[ { "id": "identity_47378", "image": "DocVQA/images/spnf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source URL of the document?" }, { "from": "gpt", "value": "The answer is https://www.industrydocuments.ucsf.edu/docs/snf02227" } ], "clip_score": 20.5, "loss": 2.386956214904785 }, { "id": "identity_37048", "image": "DVQA/images/bar_train_00016378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.703125, "loss": 3.4213249683380127 }, { "id": "identity_233301", "image": "Super-CLEVR/images/superCLEVR_new_000000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles that are either small red cars or big gray metal trucks, and how does this compare to the number of bicycles?" }, { "from": "gpt", "value": "The answer is The number of bicycles is greater" } ], "clip_score": 24.09375, "loss": 2.990757703781128 }, { "id": "identity_132051", "image": "A-OKVQA/images/151648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the cows?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 29.453125, "loss": 2.5481760501861572 }, { "id": "identity_280316", "image": "GEOS/images/001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following statements is false regarding the construction of the perpendicular bisector of AB as shown in the diagram on the right? \n(A) AC = CB \n(B) AC = 2AB \n(C) CB = 1/2*AB \n(D) AC + CB = AB" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.359375, "loss": 2.8616385459899902 }, { "id": "identity_42466", "image": "ChartQA/images/10669853002939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Panama surpass Vietnam in the share of pregnant women receiving antiretroviral therapy?" }, { "from": "gpt", "value": "The answer is 2014" } ], "clip_score": 29.625, "loss": 1.610897183418274 }, { "id": "identity_79881", "image": "Geometry3K/images/392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AG?" }, { "from": "gpt", "value": "The answer is 18.5 inches" } ], "clip_score": 29.15625, "loss": 2.568382978439331 }, { "id": "identity_173548", "image": "TabMWP/images/11122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which park has the highest number of basketball hoops?" }, { "from": "gpt", "value": "The answer is Heron Park and Mooreland Park, both with 8 hoops" } ], "clip_score": 39.1875, "loss": 1.8789271116256714 }, { "id": "identity_175234", "image": "IconQA/images/94658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock were to display the time 3 hours earlier, what would it show?" }, { "from": "gpt", "value": "The answer is 11:00" } ], "clip_score": 31.125, "loss": 1.5289112329483032 }, { "id": "identity_264162", "image": "IconQA/images/103963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the nail in inches when measured to the nearest inch using the ruler?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.78125, "loss": 2.7283551692962646 }, { "id": "identity_89880", "image": "GeoQA+/images/432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠B = ∠D and ∠D = 72° in quadrilateral ABCD, what is the measure of ∠AEC?" }, { "from": "gpt", "value": "The answer is 18°" } ], "clip_score": 33.46875, "loss": 2.2650740146636963 }, { "id": "identity_280623", "image": "DVQA/images/bar_train_00071482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which category does the color darkorange belong?" }, { "from": "gpt", "value": "The answer is sport" } ], "clip_score": 24.75, "loss": 3.957508087158203 }, { "id": "identity_200679", "image": "PMC-VQA/images/PMC6108968_Fig4_358750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of a fracture in the bone structures visible in this image?" }, { "from": "gpt", "value": "The answer is No visible fracture" } ], "clip_score": 27.078125, "loss": 3.351450204849243 }, { "id": "identity_267979", "image": "IconQA/images/100608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of birds present? \n(A) 1\n(B) 5\n(C) 2\n(D) 4\n(E) 3" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 24.4375, "loss": 3.401294231414795 }, { "id": "identity_40825", "image": "DVQA/images/bar_train_00022114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of 'flood' across all labels?" }, { "from": "gpt", "value": "The answer is 6.4" } ], "clip_score": 26.15625, "loss": 2.2221951484680176 }, { "id": "identity_286124", "image": "TabMWP/images/2258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When farmers compared the number of tractors they own, what is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.34375, "loss": 3.572136640548706 }, { "id": "identity_91333", "image": "UniGeo/images/2443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle of elevation from point B to point A?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 34.0, "loss": 1.6065664291381836 }, { "id": "identity_307152", "image": "PMC-VQA/images/PMC8373912_Fig4_37009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the images in the left panel depict?\nChoices:\n(A) High magnification view of tumor niches,\n(B) Low magnification view of immune niches,\n(C) High magnification view of immune niches,\n(D) Low magnification view of adjacent indolent and aggressive lesions." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.40625, "loss": 3.8248889446258545 }, { "id": "identity_254081", "image": "ScienceQA/images/8517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its neighboring countries within a 1000 km radius" }, { "from": "gpt", "value": "The answer is Kiribati, Marshall Islands, Solomon Islands" } ], "clip_score": 35.25, "loss": 2.754929780960083 }, { "id": "identity_99182", "image": "IconQA/images/71013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shape is a pie and each green part represents a flavor, how many different flavors are there?" }, { "from": "gpt", "value": "The answer is 1 flavor" } ], "clip_score": 34.1875, "loss": 2.5348613262176514 }, { "id": "identity_43207", "image": "FigureQA/images/76239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the y-axis for the color Lime Green?" }, { "from": "gpt", "value": "The answer is 72.5" } ], "clip_score": 24.28125, "loss": 2.098602294921875 }, { "id": "identity_83180", "image": "GeoQA+/images/2429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠MON = 90° and BE bisects ∠NBA, what is the measure of ∠ABO?" }, { "from": "gpt", "value": "The answer is 135°" } ], "clip_score": 30.84375, "loss": 1.6788355112075806 }, { "id": "identity_227572", "image": "TextVQA/images/203d9cc8d63c0287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of days between the date the email was sent and the date on the bottom right?" }, { "from": "gpt", "value": "The answer is 67 days" } ], "clip_score": 17.03125, "loss": 2.091970205307007 }, { "id": "identity_182954", "image": "PMC-VQA/images/PMC6060136_Fig2_345899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the tooth located at the intersection of the blue line and the yellow curve?" }, { "from": "gpt", "value": "The answer is Tooth number 11" } ], "clip_score": 33.28125, "loss": 2.444800615310669 }, { "id": "identity_287247", "image": "PlotQA/images/79947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the comparison between the number of refugees by country of origin in 1998 and 2002 in terms of ratio?" }, { "from": "gpt", "value": "The answer is 0.022619097763362957" } ], "clip_score": 23.40625, "loss": 2.620481491088867 }, { "id": "identity_204520", "image": "A-OKVQA/images/57641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the backpack of the person standing on the left side near the salon?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.53125, "loss": 4.105630397796631 }, { "id": "identity_265938", "image": "IconQA/images/62205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.84375, "loss": 4.776045799255371 }, { "id": "identity_163446", "image": "GeoQA+/images/4239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line a is parallel to line b and angle 1 is 42 degrees, what is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 42 degrees" } ], "clip_score": 32.03125, "loss": 1.6071739196777344 }, { "id": "identity_91775", "image": "TabMWP/images/3129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy one of each clothing item listed?" }, { "from": "gpt", "value": "The answer is $37.92" } ], "clip_score": 31.90625, "loss": 1.8733646869659424 }, { "id": "identity_283117", "image": "A-OKVQA/images/215738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the man in pink engaged in on the street?\nChoices:\n(A) driving\n(B) selling\n(C) cleaning\n(D) crossing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.59375, "loss": 4.463812828063965 }, { "id": "identity_50248", "image": "DVQA/images/bar_train_00036569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold more units of the item \"strike\"?" }, { "from": "gpt", "value": "The answer is grace with 8 units" } ], "clip_score": 34.09375, "loss": 2.936863899230957 }, { "id": "identity_136738", "image": "DVQA/images/bar_train_00139710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the least total units sold across all stores?" }, { "from": "gpt", "value": "The answer is phone" } ], "clip_score": 30.03125, "loss": 3.409621000289917 }, { "id": "identity_81718", "image": "Geometry3K/images/1445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle WXY?" }, { "from": "gpt", "value": "The answer is 114 degrees" } ], "clip_score": 33.40625, "loss": 2.373106002807617 }, { "id": "identity_268109", "image": "PMC-VQA/images/PMC6525117_Fig1_482534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the images of the teenage girl?\nChoices:\n(A) Enlarged cervical lymph nodes \n(B) Abdominal swelling \n(C) Broken bones \n(D) Dental problems" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.453125, "loss": 3.686455249786377 }, { "id": "identity_42218", "image": "FigureQA/images/1895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.15625, "loss": 2.161271095275879 }, { "id": "identity_201553", "image": "A-OKVQA/images/384146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person on the rightmost horse?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 18.96875, "loss": 3.3691086769104004 }, { "id": "identity_254679", "image": "TabMWP/images/37390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Alan need to buy a mystery novel, a poetry book, and a science book?" }, { "from": "gpt", "value": "The answer is 58" } ], "clip_score": 36.5, "loss": 3.0758349895477295 }, { "id": "identity_270976", "image": "FigureQA/images/50819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 3.7879421710968018 }, { "id": "identity_9897", "image": "TextVQA/images/e9e53a549006b8cb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what numbers are combined in the middle?" }, { "from": "gpt", "value": "The answer is 7 6" } ], "clip_score": 28.03125, "loss": 1.9072415828704834 }, { "id": "identity_246786", "image": "VizWiz/images/VizWiz_train_00019201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name and the main image depicted on this glass mug?" }, { "from": "gpt", "value": "The answer is Budweiser, horses pulling a carriage" } ], "clip_score": 36.03125, "loss": 2.6404306888580322 }, { "id": "identity_132333", "image": "TextVQA/images/0095c71d8fbe4462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color scheme of the team uniform for the player with the ball?" }, { "from": "gpt", "value": "The answer is White, black, and gold" } ], "clip_score": 21.28125, "loss": 2.577319622039795 }, { "id": "identity_184974", "image": "PMC-VQA/images/PMC6723156_jcm-08-01197-f006_33060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are indicated by the white arrowheads in the image?" }, { "from": "gpt", "value": "The answer is Neutrophils" } ], "clip_score": 26.03125, "loss": 3.078444719314575 }, { "id": "identity_88536", "image": "GeoQA+/images/5840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the square S3 in the given right triangle ABC?" }, { "from": "gpt", "value": "The answer is 4 times the square root of 31 or approximately 22.24" } ], "clip_score": 32.25, "loss": 1.8848729133605957 }, { "id": "identity_137686", "image": "FigureQA/images/66366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Saddle Brown at x-axis label 100?" }, { "from": "gpt", "value": "The answer is Approximately 79" } ], "clip_score": 26.453125, "loss": 2.73824143409729 }, { "id": "identity_17488", "image": "DocVQA/images/jfvh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is shortest interval for Baltimore?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 20.625, "loss": 2.661620616912842 }, { "id": "identity_247512", "image": "PMC-VQA/images/PMC4008279_fig1_285387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in the transesophageal echocardiogram image, and how do they help in diagnosing cardiac conditions?" }, { "from": "gpt", "value": "The answer is Left atrium, left ventricle, mitral valve, aorta" } ], "clip_score": 32.46875, "loss": 2.206627368927002 }, { "id": "identity_190137", "image": "PMC-VQA/images/PMC4861721_F5_29095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black arrows are pointing to the green objects?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.15625, "loss": 2.3405494689941406 }, { "id": "identity_201614", "image": "PMC-VQA/images/PMC7353682_Fig1_211897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a presence of a foreign object in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.234375, "loss": 2.848686933517456 }, { "id": "identity_226320", "image": "VQA-RAD/images/synpic48714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential causes and complications of a subarachnoid hemorrhage as seen in the image?" }, { "from": "gpt", "value": "The answer is Trauma, aneurysm, AVM, hypertension, blood disorders, rebleeding, vasospasm, hydrocephalus, seizures, cognitive deficits, death" } ], "clip_score": 26.984375, "loss": 1.7507426738739014 }, { "id": "identity_68001", "image": "ScienceQA/images/1026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of membrane is shown in the diagram that allows the movement of solute particles?" }, { "from": "gpt", "value": "The answer is Selectively permeable membrane" } ], "clip_score": 29.359375, "loss": 1.6667851209640503 }, { "id": "identity_130837", "image": "A-OKVQA/images/181453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any luggage present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.78125, "loss": 3.0771281719207764 }, { "id": "identity_207971", "image": "A-OKVQA/images/484761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible audience watching the planes?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.984375, "loss": 2.4043283462524414 }, { "id": "identity_197496", "image": "PMC-VQA/images/PMC5758251_F5_260883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the lumbar spine show any signs of osteophytes or bone spurs in this image?" }, { "from": "gpt", "value": "The answer is No osteophytes or bone spurs are detected" } ], "clip_score": 31.609375, "loss": 2.1121106147766113 }, { "id": "identity_35174", "image": "PlotQA/images/49122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years are there in the graph ?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.234375, "loss": 2.3128013610839844 }, { "id": "identity_201185", "image": "VQA-AS/images/474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of toy structure is placed on the table?" }, { "from": "gpt", "value": "The answer is A toy house" } ], "clip_score": 27.9375, "loss": 3.1371827125549316 }, { "id": "identity_250343", "image": "VQA2.0/images/290560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are there in the carriage, and how many wheels are visible on the carriage?" }, { "from": "gpt", "value": "The answer is Three people and four wheels" } ], "clip_score": 29.078125, "loss": 2.953275680541992 }, { "id": "identity_222863", "image": "TextVQA/images/d4fbce59a74dc74c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cooking utensil being used to handle the vegetables?" }, { "from": "gpt", "value": "The answer is Tongs" } ], "clip_score": 23.15625, "loss": 2.550413131713867 }, { "id": "identity_134762", "image": "PlotQA/images/135230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any country have a decrease in value added by service sector from 1962 to 1963?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.9375, "loss": 3.1165623664855957 }, { "id": "identity_108822", "image": "VQA2.0/images/206363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different airline companies can be identified in this image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.640625, "loss": 2.2313690185546875 }, { "id": "identity_23052", "image": "GeoQA+/images/1740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 已知:如图,PA,PB分别与⊙O相切于A,B点,C为⊙O上一点,∠ACB=65°,则∠APB等于()\nChoices:\n(A) 65°\n(B) 50°\n(C) 45°\n(D) 40°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.984375, "loss": 2.6589386463165283 }, { "id": "identity_230541", "image": "PMC-VQA/images/PMC5627507_f0020_224057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the changes observed in the sample between 40 minutes and 60 minutes. What can be inferred about the sample's behavior over time?" }, { "from": "gpt", "value": "The answer is The bright spots grow larger and more dispersed over time" } ], "clip_score": 22.703125, "loss": 3.5293943881988525 }, { "id": "identity_143226", "image": "DocVQA/images/jggn0226_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much pain does the patient have when standing upright?" }, { "from": "gpt", "value": "The answer is None" } ], "clip_score": 19.328125, "loss": 3.2855563163757324 }, { "id": "identity_322981", "image": "TextVQA/images/8ca1f379a91ad12d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time?" }, { "from": "gpt", "value": "The answer is 11:30" } ], "clip_score": 14.515625, "loss": 1.7611446380615234 }, { "id": "identity_299924", "image": "MapQA/images/map_2606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value in the United States of America?" }, { "from": "gpt", "value": "The answer is 10.9%-13.1%" } ], "clip_score": 25.171875, "loss": 2.0248749256134033 }, { "id": "identity_249398", "image": "TextVQA/images/ff1f0006533d8e6e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the alcohol by volume (ABV) percentage of Guinness Foreign Extra Stout beer shown in the image?" }, { "from": "gpt", "value": "The answer is YOUR CORRECT ANSWER" } ], "clip_score": 30.15625, "loss": 2.5703001022338867 }, { "id": "identity_115643", "image": "TextVQA/images/9caf43e961f1956b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What Roman numeral is at the 4 o'clock position?" }, { "from": "gpt", "value": "The answer is IV" } ], "clip_score": 18.09375, "loss": 2.481492519378662 }, { "id": "identity_126813", "image": "A-OKVQA/images/140151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are hanging from the top of the stall?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.8125, "loss": 2.4903271198272705 }, { "id": "identity_245055", "image": "DocVQA/images/nhcn0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are represented by the farmers who rely on Chlorpyrifos, and which state has the highest number of farmers listed?" }, { "from": "gpt", "value": "The answer is 10 states, Idaho has the highest number of farmers" } ], "clip_score": 28.140625, "loss": 1.9382585287094116 }, { "id": "identity_206687", "image": "PMC-VQA/images/PMC5728979_F1_252867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of this brain image?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 33.15625, "loss": 3.6018545627593994 }, { "id": "identity_332589", "image": "GeoQA+/images/1376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图AB是⊙O的直径,点D在AB的延长线上,DC切⊙O于点C,若∠A=35°,则∠D等于多少?\nChoices:\n(A) 50°\n(B) 40°\n(C) 30°\n(D) 20°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.0625, "loss": 2.628122329711914 }, { "id": "identity_214082", "image": "Super-CLEVR/images/superCLEVR_new_001014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the blue object have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.9375, "loss": 4.058200836181641 }, { "id": "identity_80566", "image": "GeoQA+/images/5997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AC is the diameter of circle O and triangle ABC is inscribed in circle O, with AB equal to BC, and angle DBC equals 32 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 35.71875, "loss": 2.1469037532806396 }, { "id": "identity_326134", "image": "TabMWP/images/6750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Someone hosts an annual art contest for kids and keeps a record of the number of entries each year. According to the table, what was the rate of change between these two years?" }, { "from": "gpt", "value": "The answer is -2" } ], "clip_score": 37.0, "loss": 3.420152425765991 }, { "id": "identity_206724", "image": "PMC-VQA/images/PMC7581161_F2_280580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a mass effect in the brain MRI?" }, { "from": "gpt", "value": "The answer is Yes, there is evidence of a mass effect" } ], "clip_score": 28.171875, "loss": 1.9099440574645996 }, { "id": "identity_59716", "image": "DVQA/images/bar_train_00064595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of units sold for item shift greater than the number of units sold for item trade?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.90625, "loss": 3.2727344036102295 }, { "id": "identity_207130", "image": "TextVQA/images/9061324f2a0a8cc0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is being maintained in the image?" }, { "from": "gpt", "value": "The answer is Helicopter" } ], "clip_score": 24.59375, "loss": 2.3857855796813965 }, { "id": "identity_186306", "image": "A-OKVQA/images/437516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the skateboarder's cap?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.6875, "loss": 3.467055559158325 }, { "id": "identity_243019", "image": "VQA2.0/images/548020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow stripes are visible on the sidewalk, and what is the total number of stripes (both yellow and non-yellow) visible on the sidewalk?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 18.375, "loss": 3.475627899169922 }, { "id": "identity_299710", "image": "IconQA/images/23739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on? \nOptions:\n(A) white\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.765625, "loss": 4.021348476409912 }, { "id": "identity_314825", "image": "TabMWP/images/22226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest number of passengers counted by the train conductor?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 34.03125, "loss": 3.178541421890259 }, { "id": "identity_216919", "image": "TextVQA/images/dc62484d529ec714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main subject of the photograph on the cover?" }, { "from": "gpt", "value": "The answer is A lighthouse" } ], "clip_score": 26.453125, "loss": 2.8756675720214844 }, { "id": "identity_289025", "image": "A-OKVQA/images/37563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Of what material is this statue composed?\nChoices:\n(A) metal\n(B) wood\n(C) clay\n(D) pic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.59375, "loss": 4.35849666595459 }, { "id": "identity_214274", "image": "TextVQA/images/a19a84eee2925361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the advertisement on the sideline board behind the player with the ball?" }, { "from": "gpt", "value": "The answer is Gatorade" } ], "clip_score": 21.53125, "loss": 2.4376626014709473 }, { "id": "identity_18935", "image": "GeoQA+/images/3444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点C是线段AB上一点,点P是AC的中点,点Q是BC的中点,已知线段AC=8cm,线段BC=4cm,则线段PQ为()\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 12cm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.09375, "loss": 2.4400532245635986 }, { "id": "identity_19929", "image": "VQA2.0/images/130081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many glass panels are there?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 16.359375, "loss": 2.221916437149048 }, { "id": "identity_69374", "image": "TQA/images/question_images/human_system_digestive_6096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"gullet\"?" }, { "from": "gpt", "value": "The answer is Transport food and liquids from the mouth to the stomach" } ], "clip_score": 34.21875, "loss": 1.7941482067108154 }, { "id": "identity_149921", "image": "DocVQA/images/xnpv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the publication month and year of the Fact Book?" }, { "from": "gpt", "value": "The answer is March 1963" } ], "clip_score": 26.84375, "loss": 1.9494831562042236 }, { "id": "identity_54450", "image": "DocVQA/images/ppxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What instruction is given regarding the use of the list?" }, { "from": "gpt", "value": "The answer is Do not use, for records only" } ], "clip_score": 21.515625, "loss": 2.3865575790405273 }, { "id": "identity_123143", "image": "VizWiz/images/VizWiz_train_00023757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the mobile network provider shown in the image?" }, { "from": "gpt", "value": "The answer is MTN SYRIA" } ], "clip_score": 25.375, "loss": 2.0950584411621094 }, { "id": "identity_12984", "image": "PMC-VQA/images/PMC6886653_FIG3_76696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the medical condition of the patient? \nChoices:\n(A) Breast Cancer \n(B) Prostate Cancer \n(C) Bile Duct Carcinoma \n(D) Skin Cancer " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.171875, "loss": 4.085179805755615 }, { "id": "identity_159118", "image": "GeoQA+/images/1190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diameter of the circle in the given image?" }, { "from": "gpt", "value": "The answer is Diameter of the circle = 10" } ], "clip_score": 33.9375, "loss": 2.1447598934173584 }, { "id": "identity_9645", "image": "TextVQA/images/c5e77a36168f68d4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who wrote this?" }, { "from": "gpt", "value": "The answer is h. l. hunley" } ], "clip_score": 29.375, "loss": 2.0561444759368896 }, { "id": "identity_99107", "image": "IconQA/images/16407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more hearts on the top row or on the bottom row?" }, { "from": "gpt", "value": "The answer is Bottom row" } ], "clip_score": 28.75, "loss": 1.6645749807357788 }, { "id": "identity_81632", "image": "GEOS/images/039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 4 if the measure of angle 5 is 110 degrees?" }, { "from": "gpt", "value": "The answer is 110 degrees" } ], "clip_score": 30.5625, "loss": 1.7006850242614746 }, { "id": "identity_193057", "image": "A-OKVQA/images/409985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What food item is served on top of the pizza?" }, { "from": "gpt", "value": "The answer is French fries" } ], "clip_score": 26.984375, "loss": 2.122370958328247 }, { "id": "identity_26892", "image": "A-OKVQA/images/85081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is this man taking part in?\nChoices:\n(A) hiking\n(B) roller skating\n(C) skiboarding\n(D) skiing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.875, "loss": 4.2643351554870605 }, { "id": "identity_199539", "image": "PMC-VQA/images/PMC7060984_Fig2_130337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the presence of ascites?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.2109375, "loss": 3.2299530506134033 }, { "id": "identity_312653", "image": "GeoQA+/images/1511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, in quadrilateral ▱ABCD, AC and BD are diagonals, BC = 3, and the height from BC is 2. What is the area of the shaded region?\n\nChoices:\n(A) 3\n(B) 6\n(C) 12\n(D) 24" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.78125, "loss": 2.6146342754364014 }, { "id": "identity_142585", "image": "DVQA/images/bar_train_00164356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the items \"visit\" and \"night\"?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 25.84375, "loss": 2.463918685913086 }, { "id": "identity_88903", "image": "UniGeo/images/2483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AE:EB=1:2 in parallelogram ABCD and the area of triangle AEF is 6, what is the area of triangle CDF?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 35.9375, "loss": 1.7693212032318115 }, { "id": "identity_274352", "image": "ChartQA/images/multi_col_40962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what connection speed is the percentage 100%?" }, { "from": "gpt", "value": "The answer is 30 Mbps" } ], "clip_score": 24.3125, "loss": 1.651503324508667 }, { "id": "identity_139968", "image": "MapQA/images/map_11086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have the highest range of classes according to the legend?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.40625, "loss": 2.4104626178741455 }, { "id": "identity_130621", "image": "A-OKVQA/images/191230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street on the street sign above the traffic light?" }, { "from": "gpt", "value": "The answer is Pearl St" } ], "clip_score": 24.6875, "loss": 3.343766450881958 }, { "id": "identity_68156", "image": "AI2D/images/3703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organ is labeled directly to the right of the spleen?" }, { "from": "gpt", "value": "The answer is Stomach" } ], "clip_score": 33.65625, "loss": 2.3879072666168213 }, { "id": "identity_55105", "image": "FigureQA/images/70080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for Dark Olive?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.890625, "loss": 2.1128451824188232 }, { "id": "identity_162143", "image": "Geometry3K/images/1806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the given trapezoid ABCD?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 35.34375, "loss": 2.8263497352600098 }, { "id": "identity_269316", "image": "ScienceQA/images/1910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Washington?\n(A) Orlando\n(B) Olympia\n(C) Denver\n(D) Spokane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.15625, "loss": 4.275156021118164 }, { "id": "identity_333059", "image": "A-OKVQA/images/309412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of zone is this place?\nChoices:\n(A) tourist\n(B) residential\n(C) shopping\n(D) business" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.1015625, "loss": 4.228081703186035 }, { "id": "identity_125825", "image": "PMC-VQA/images/PMC3766644_F4_230375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of surgical hardware in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.578125, "loss": 2.9178967475891113 }, { "id": "identity_2326", "image": "AI2D/images/3002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the region that represents the leaflet?\nChoices:\n(A) b\n(B) a\n(C) d\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.109375, "loss": 4.280797481536865 }, { "id": "identity_135136", "image": "PlotQA/images/56884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in aid flow from UN agencies between Middle income and Sub-Saharan Africa (developing only) in 2009?" }, { "from": "gpt", "value": "The answer is The difference in US$" } ], "clip_score": 28.484375, "loss": 4.00876522064209 }, { "id": "identity_30697", "image": "PMC-VQA/images/PMC6542951_F6_487708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the center of the image? \nChoices:\n(A) Automatic lesion segmentation \n(B) Original FLAIR MRI dataset \n(C) Manual lesion segmentation \n(D) Multiple lesions in a single image " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.28125, "loss": 4.50724458694458 }, { "id": "identity_70806", "image": "AI2D/images/3631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the blood vessels labeled that carry blood away from the heart to the lungs?" }, { "from": "gpt", "value": "The answer is pulmonary arteries" } ], "clip_score": 31.296875, "loss": 1.8359391689300537 }, { "id": "identity_159811", "image": "GeoQA+/images/1531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the right triangle ABC, if angle C is 90 degrees, AB is 2, and BC is 1, what is the measure of angle A in degrees?" }, { "from": "gpt", "value": "The answer is arcsin(1/2) which is 30 degrees" } ], "clip_score": 30.375, "loss": 1.8139983415603638 }, { "id": "identity_283358", "image": "ScienceQA/images/4925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Salt Lake City\n(C) Phoenix\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.859375, "loss": 4.957512855529785 }, { "id": "identity_37772", "image": "CLEVR-Math/images/CLEVR_train_063517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 blue matte things. How many objects exist?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.46875, "loss": 2.1745259761810303 }, { "id": "identity_160443", "image": "UniGeo/images/252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given image of circle O with inscribed quadrilateral ABCD, if ∠BCD = 110°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 35.90625, "loss": 1.6092709302902222 }, { "id": "identity_182876", "image": "A-OKVQA/images/225299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the skier's outfit?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 24.78125, "loss": 3.038414716720581 }, { "id": "identity_227635", "image": "PMC-VQA/images/PMC6882616_F1_75426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which imaging modality is used to demonstrate soft-tissue masses, and what are the key differences between T1-weighted imaging (T1WI) and T2-weighted imaging (T2WI) in terms of tissue contrast and clinical applications?" }, { "from": "gpt", "value": "The answer is T1WI is used for anatomical detail; T2WI for pathology detection" } ], "clip_score": 26.46875, "loss": 2.9164741039276123 }, { "id": "identity_300317", "image": "IconQA/images/10823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a triangle on this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.4375, "loss": 3.685265302658081 }, { "id": "identity_181556", "image": "CLEVR-Math/images/CLEVR_train_032220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cube in the image?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 27.078125, "loss": 3.2856972217559814 }, { "id": "identity_219678", "image": "PMC-VQA/images/PMC8470686_brainsci-11-01169-f002_69305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique was likely used to create this 3D reconstruction?" }, { "from": "gpt", "value": "The answer is The imaging technique is likely computed tomography (CT)" } ], "clip_score": 28.984375, "loss": 1.942478895187378 }, { "id": "identity_238448", "image": "A-OKVQA/images/464149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours are there between the current time shown on the clock and 6 PM on the same day?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 9.40625, "loss": 2.7625818252563477 }, { "id": "identity_10885", "image": "PMC-VQA/images/PMC6092347_Fig5_354068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of pearl is depicted in the radiographs? \nChoices:\n(A) Freshwater pearl \n(B) Conch pearl \n(C) Akoya pearl \n(D) Saltwater pearl. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.234375, "loss": 4.128539085388184 }, { "id": "identity_163476", "image": "GeoQA+/images/5580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AD=3, DC=4, and DE=2.5, what is the measure of angle ADE?" }, { "from": "gpt", "value": "The answer is approximately 50.19 degrees" } ], "clip_score": 29.125, "loss": 2.0844242572784424 }, { "id": "identity_83670", "image": "Geometry3K/images/1677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For trapezoid QRTU, if the length of QR is 12 and the length of UT is 22, what is the length of RT?" }, { "from": "gpt", "value": "The answer is 29" } ], "clip_score": 32.15625, "loss": 2.4356112480163574 }, { "id": "identity_229092", "image": "TQA/images/question_images/cycle_nitrogen_6721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of steps leading to the nitrogen in plant proteins, starting from ammonia and nitrates in the soil, as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Ammonia and nitrates → Absorption → Plant proteins → Feeding → Excretion → Decay" } ], "clip_score": 22.796875, "loss": 1.7730978727340698 }, { "id": "identity_41834", "image": "PlotQA/images/75577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the population of the largest city in India as a percentage of urban population?" }, { "from": "gpt", "value": "The answer is 1.5e+7 or 15000000" } ], "clip_score": 31.0625, "loss": 1.8161953687667847 }, { "id": "identity_318387", "image": "TQA/images/question_images/aquifers_6509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many kinds of aquifers are there?\nChoices:\n(A) 1\n(B) 3\n(C) 4\n(D) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.109375, "loss": 2.951298236846924 }, { "id": "identity_11862", "image": "FigureQA/images/97287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Violet have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 3.9515163898468018 }, { "id": "identity_212449", "image": "VQA2.0/images/542027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible on the table?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 13.4453125, "loss": 4.65197229385376 }, { "id": "identity_166525", "image": "TabMWP/images/4883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of pets owned by the students?" }, { "from": "gpt", "value": "The answer is 4.2" } ], "clip_score": 34.8125, "loss": 1.9922958612442017 }, { "id": "identity_127058", "image": "VQA2.0/images/467432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.390625, "loss": 2.27215576171875 }, { "id": "identity_173614", "image": "IconQA/images/82333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vertices in the shape?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.15625, "loss": 2.203058958053589 }, { "id": "identity_152307", "image": "ScienceQA/images/1436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders the highlighted state to the east?" }, { "from": "gpt", "value": "The answer is Indiana" } ], "clip_score": 28.96875, "loss": 3.1676948070526123 }, { "id": "identity_70091", "image": "AI2D/images/3642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the vessel that carries oxygenated blood from the lungs to the heart" }, { "from": "gpt", "value": "The answer is Pulmonary veins" } ], "clip_score": 29.609375, "loss": 2.048527956008911 }, { "id": "identity_90438", "image": "Geometry3K/images/962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the quadrilateral if it is a parallelogram and x = 4?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 37.125, "loss": 2.8068482875823975 }, { "id": "identity_221003", "image": "A-OKVQA/images/180824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.296875, "loss": 2.457686185836792 }, { "id": "identity_85788", "image": "UniGeo/images/1176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠ACB = 40°, what is the measure of ∠CAB in triangle ABC?" }, { "from": "gpt", "value": "The answer is 100°" } ], "clip_score": 33.09375, "loss": 1.7280322313308716 }, { "id": "identity_153622", "image": "TQA/images/abc_question_images/seasons_10672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the Southern Hemisphere summer?" }, { "from": "gpt", "value": "The answer is H" } ], "clip_score": 32.09375, "loss": 3.1698975563049316 }, { "id": "identity_320829", "image": "PlotQA/images/136481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the percentage of exports to various countries in 1963 in North America and that in South Asia?" }, { "from": "gpt", "value": "The answer is 3.1560438564761686" } ], "clip_score": 29.796875, "loss": 2.7635979652404785 }, { "id": "identity_108796", "image": "VizWiz/images/VizWiz_train_00013502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the 'Sign In' button at the top right corner?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.03125, "loss": 3.8954529762268066 }, { "id": "identity_116375", "image": "VQA-AS/images/8408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the curtain in the window?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 21.484375, "loss": 3.6419882774353027 }, { "id": "identity_213886", "image": "A-OKVQA/images/330726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tallest object in the image?" }, { "from": "gpt", "value": "The answer is The building on the left" } ], "clip_score": 19.4375, "loss": 3.6550042629241943 }, { "id": "identity_153353", "image": "AI2D/images/2485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the life cycle of a sandfly?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.15625, "loss": 2.2096107006073 }, { "id": "identity_312820", "image": "VQA2.0/images/294811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there many kites?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.015625, "loss": 2.8376479148864746 }, { "id": "identity_49334", "image": "FigureQA/images/50635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Light Green section the smallest portion of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 37.09375, "loss": 3.4200453758239746 }, { "id": "identity_332824", "image": "ScienceQA/images/9861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism belongs to the same species as the brown pelican?\nChoices:\n(A) Pelecanus occidentalis\n(B) Ardea cocoi\n(C) Strix uralensis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0625, "loss": 4.015366077423096 }, { "id": "identity_179034", "image": "IconQA/images/49140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the smallest triangle has a base of 2 cm, what is the total base length of all triangles?" }, { "from": "gpt", "value": "The answer is 14 cm" } ], "clip_score": 29.0625, "loss": 1.9052050113677979 }, { "id": "identity_69031", "image": "ScienceQA/images/8115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which pair of magnets will attract each other?" }, { "from": "gpt", "value": "The answer is Pair 2" } ], "clip_score": 26.046875, "loss": 1.6657497882843018 }, { "id": "identity_106218", "image": "A-OKVQA/images/334675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.03125, "loss": 3.1544394493103027 }, { "id": "identity_187864", "image": "VQA-AS/images/8814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cat near the pond?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 26.21875, "loss": 4.104207515716553 }, { "id": "identity_136183", "image": "MapQA/images/map_1052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value range of 4336-4768?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 20.125, "loss": 3.819838047027588 }, { "id": "identity_222295", "image": "VQA2.0/images/252759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is seen in the background?" }, { "from": "gpt", "value": "The answer is Lush greenery, likely tropical" } ], "clip_score": 13.6328125, "loss": 3.707921028137207 }, { "id": "identity_179860", "image": "IconQA/images/40366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of images displayed?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 14.609375, "loss": 1.8678785562515259 }, { "id": "identity_311752", "image": "ChartQA/images/two_col_81054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most common response?" }, { "from": "gpt", "value": "The answer is A little bit longer" } ], "clip_score": 25.4375, "loss": 2.526636838912964 }, { "id": "identity_320202", "image": "TabMWP/images/19553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Russell has $24. Can he afford a fleece blanket and a yellow bath towel?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.09375, "loss": 3.6837120056152344 }, { "id": "identity_96185", "image": "TabMWP/images/21887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much did the weight of the winning dog increase from 2018 to 2019?" }, { "from": "gpt", "value": "The answer is 1 kg" } ], "clip_score": 39.1875, "loss": 2.237255573272705 }, { "id": "identity_226789", "image": "GeoQA+/images/1510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在▱ABCD中,AC,BD交于点O,OE⊥BD,交AD边于点E,若▱ABCD的周长为20,且AD=5,则△ABE的面积是多少?" }, { "from": "gpt", "value": "The answer is \\( \\frac{25\\sqrt{2}}{4} \\)" } ], "clip_score": 27.109375, "loss": 1.8421838283538818 }, { "id": "identity_145282", "image": "DocVQA/images/kqcg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was this article originally published in the journal?" }, { "from": "gpt", "value": "The answer is 1959" } ], "clip_score": 22.4375, "loss": 1.8864169120788574 }, { "id": "identity_53091", "image": "ChartQA/images/two_col_40255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total revenue over the five-year period?" }, { "from": "gpt", "value": "The answer is 1345" } ], "clip_score": 21.703125, "loss": 2.628962993621826 }, { "id": "identity_66830", "image": "ScienceQA/images/3537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled in the image?" }, { "from": "gpt", "value": "The answer is Attract pollinators" } ], "clip_score": 22.703125, "loss": 2.925088405609131 }, { "id": "identity_88203", "image": "UniGeo/images/3022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If PA and PB are tangents to circle O at points A and B respectively, and AC is the diameter of circle O, what is the length of PB if PA is 10 units long?" }, { "from": "gpt", "value": "The answer is 10 units" } ], "clip_score": 32.1875, "loss": 1.9226021766662598 }, { "id": "identity_160435", "image": "GeoQA+/images/2108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that angle 1 is 50 degrees and line a is parallel to line b, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 31.875, "loss": 1.9386574029922485 }, { "id": "identity_103993", "image": "TextVQA/images/ed25306b6327fe79.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pitcher's uniform?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.09375, "loss": 3.976910352706909 }, { "id": "identity_83139", "image": "Geometry3K/images/1018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of y?" }, { "from": "gpt", "value": "The answer is y = 12.64" } ], "clip_score": 28.765625, "loss": 1.9725699424743652 }, { "id": "identity_262456", "image": "AI2D/images/4621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the ferromagnesian silicate mineral labeled as C from the following options:\n(A) Olivine\n(B) Augite\n(C) Biotite mica\n(D) Hornblende" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.75, "loss": 3.9204964637756348 }, { "id": "identity_73493", "image": "TQA/images/abc_question_images/human_system_digestive_13687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter indicates the organ that is part of the digestive system and primarily responsible for nutrient absorption?" }, { "from": "gpt", "value": "The answer is P" } ], "clip_score": 33.125, "loss": 3.3016743659973145 }, { "id": "identity_310430", "image": "IconQA/images/78608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you pick a marble without looking, how likely is it that you will get a black one?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 3.953233003616333 }, { "id": "identity_205088", "image": "A-OKVQA/images/267203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen standing on the platform?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.28125, "loss": 2.3629846572875977 }, { "id": "identity_142464", "image": "DocVQA/images/rshg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient of the letter?" }, { "from": "gpt", "value": "The answer is Dr. Robert E. Shank" } ], "clip_score": 24.796875, "loss": 2.7120468616485596 }, { "id": "identity_265522", "image": "DVQA/images/bar_train_00151139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many categories are there objects that are preferred by fewer than 2 people?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 32.65625, "loss": 3.55915904045105 }, { "id": "identity_334993", "image": "PMC-VQA/images/PMC7756166_rcr2705-fig-0001_325202.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location and characteristic of the lymph node shown in the image?\nChoices:\n(A) Enlarged, calcified paratracheal lymph node.\n(B) Enlarged, non-calcified subcarinal lymph node.\n(C) Enlarged, calcified subcarinal lymph node.\n(D) Enlarged, non-calcified paratracheal lymph node." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.375, "loss": 3.939481258392334 }, { "id": "identity_88386", "image": "UniGeo/images/1265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of arc BC in the given circle if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 5.42 units (rounded to two decimal places)" } ], "clip_score": 31.6875, "loss": 1.5578848123550415 }, { "id": "identity_307753", "image": "VQA2.0/images/265967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of gathering is this?" }, { "from": "gpt", "value": "The answer is bike show" } ], "clip_score": 30.390625, "loss": 2.5730209350585938 }, { "id": "identity_17798", "image": "Super-CLEVR/images/superCLEVR_new_000624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rubber regular bus?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 21.75, "loss": 4.743264198303223 }, { "id": "identity_241608", "image": "VizWiz/images/VizWiz_train_00002564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hotel and the service offered, and what is the contact number provided on the sign?" }, { "from": "gpt", "value": "The answer is HOTEL AFRODITE, RENT A BICYCLE, 6981772389" } ], "clip_score": 46.625, "loss": 1.5739796161651611 }, { "id": "identity_76315", "image": "AI2D/images/3525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the cell is responsible for breaking down waste materials and cellular debris?" }, { "from": "gpt", "value": "The answer is Lysosome" } ], "clip_score": 25.296875, "loss": 1.3094022274017334 }, { "id": "identity_137567", "image": "FigureQA/images/87393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points does Deep Pink have above 80?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 25.71875, "loss": 2.3583567142486572 }, { "id": "identity_98870", "image": "TabMWP/images/143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Omar buys the two least expensive items, how much money will he have left?" }, { "from": "gpt", "value": "The answer is Omar will have $18 left" } ], "clip_score": 16.640625, "loss": 2.4901375770568848 }, { "id": "identity_18115", "image": "UniGeo/images/2758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在▱ABCD中,∠ABC的平分线与对角线AC交于点E,与CD交于点M,已知BC=2,DM=3,▱ABCD的面积为28,则△ABE的面积为()\nChoices:\n(A) \\frac{28}{3}\n(B) \\frac{21}{2}\n(C) 10\n(D) \\frac{14}{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.140625, "loss": 2.7622995376586914 }, { "id": "identity_52020", "image": "FigureQA/images/56475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal sections for each color?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.4375, "loss": 3.382603168487549 }, { "id": "identity_80715", "image": "Geometry3K/images/738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle LMK?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 31.3125, "loss": 2.218097448348999 }, { "id": "identity_225417", "image": "AI2D/images/1832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of algae decreases, which organism's population will be affected the most indirectly, and how will this change propagate through the food web?" }, { "from": "gpt", "value": "The answer is Polar Bear" } ], "clip_score": 31.953125, "loss": 3.757875680923462 }, { "id": "identity_92665", "image": "IconQA/images/55307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one can is added to the end of each row, how many cans will there be in total?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 26.953125, "loss": 2.402822732925415 }, { "id": "identity_328481", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who sent the email?" }, { "from": "gpt", "value": "The answer is Microsoft Outlook" } ], "clip_score": 24.78125, "loss": 2.806495428085327 }, { "id": "identity_195339", "image": "TextVQA/images/534f4ce2ac0805ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the shadow cast by the jars?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 21.25, "loss": 3.066347360610962 }, { "id": "identity_274523", "image": "TQA/images/question_images/waves_interactions_interference_8232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of waves nullify each other? \n(A) Non-Harmonic \n(B) Harmonic \n(C) Destructive \n(D) Constructive" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.234375, "loss": 3.9544506072998047 }, { "id": "identity_230842", "image": "ScienceQA/images/7271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the biodiversity in Kaeng Krachan National Park contribute to the stability and resilience of its ecosystem?" }, { "from": "gpt", "value": "The answer is High biodiversity enhances ecosystem stability and resilience" } ], "clip_score": 23.890625, "loss": 2.2541074752807617 }, { "id": "identity_315477", "image": "Super-CLEVR/images/superCLEVR_new_001462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small brown thing on the left side of the rubber object next to the tiny yellow matte object made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 20.109375, "loss": 2.955874443054199 }, { "id": "identity_83591", "image": "UniGeo/images/2576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rhombus ABCD, if angle BAD is 80 degrees and AE is the perpendicular from A to BD, what is the measure of angle AED?" }, { "from": "gpt", "value": "The answer is 110 degrees" } ], "clip_score": 34.65625, "loss": 1.7559713125228882 }, { "id": "identity_17071", "image": "VQA-RAD/images/synpic43433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is there dilation of the bowel?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.171875, "loss": 3.147000551223755 }, { "id": "identity_103393", "image": "A-OKVQA/images/120052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the green sign on the ground?" }, { "from": "gpt", "value": "The answer is AID FOOD BANK" } ], "clip_score": 20.28125, "loss": 3.353773593902588 }, { "id": "identity_33669", "image": "PMC-VQA/images/PMC7362644_Fig1_214575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which limbs show a neurogenic pattern? \nChoices:\n(A) Lower limbs \n(B) Upper limbs \n(C) Both limbs \n(D) No particular limb " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.4375, "loss": 4.169529438018799 }, { "id": "identity_223987", "image": "TQA/images/question_images/optics_ray_diagrams_9175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total distance between the object and the image, given that the focal length (f) is the same on both sides of the lens?" }, { "from": "gpt", "value": "The answer is 2f" } ], "clip_score": 31.171875, "loss": 2.0088562965393066 }, { "id": "identity_290305", "image": "DocVQA/images/gljd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who holds the position of Director of Science and Education?" }, { "from": "gpt", "value": "The answer is Dr. Nyle C. Brady" } ], "clip_score": 18.671875, "loss": 2.419663667678833 }, { "id": "identity_61402", "image": "MapQA/images/map_2313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a picture count within the range of 73359-73487?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 30.234375, "loss": 3.879775285720825 }, { "id": "identity_273040", "image": "ChartQA/images/two_col_81054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What response is the most commonly chosen?" }, { "from": "gpt", "value": "The answer is A little bit longer" } ], "clip_score": 25.03125, "loss": 2.5245113372802734 }, { "id": "identity_181984", "image": "PMC-VQA/images/PMC7460125_diagnostics-10-00534-f002_243447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images in axial or coronal plane?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 21.390625, "loss": 2.416815996170044 }, { "id": "identity_262524", "image": "PMC-VQA/images/PMC6104906_FIG1_357591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of signal is indicated by the red outline in the image provided?\nChoices:\n(A) Aortic stenosis signal \n(B) Pulmonary embolism signal \n(C) Mitral regurgitation signal \n(D) Mitral stenosis signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.71875, "loss": 4.585793495178223 }, { "id": "identity_30325", "image": "DocVQA/images/ytcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Telephone Number?" }, { "from": "gpt", "value": "The answer is 309/438-2515" } ], "clip_score": 12.09375, "loss": 1.6552705764770508 }, { "id": "identity_56139", "image": "DVQA/images/bar_train_00029252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is most preferred by people?" }, { "from": "gpt", "value": "The answer is beard" } ], "clip_score": 28.5, "loss": 2.5934207439422607 }, { "id": "identity_151496", "image": "ScienceQA/images/2463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the chance that an offspring produced by this cross will have black fur?" }, { "from": "gpt", "value": "The answer is 100%" } ], "clip_score": 26.265625, "loss": 1.544177532196045 }, { "id": "identity_331212", "image": "VQA-AS/images/8482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sun going down?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.46875, "loss": 4.266791343688965 }, { "id": "identity_43028", "image": "PlotQA/images/101658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of years displayed on the X-axis?" }, { "from": "gpt", "value": "The answer is 2002 to 2012" } ], "clip_score": 27.28125, "loss": 1.6045401096343994 }, { "id": "identity_336714", "image": "VizWiz/images/VizWiz_train_00000475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this product?" }, { "from": "gpt", "value": "The answer is shea body butter" } ], "clip_score": 37.15625, "loss": 1.869962453842163 }, { "id": "identity_121841", "image": "VQA-RAD/images/synpic28569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the large organ on the right side of the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 27.046875, "loss": 3.607182025909424 }, { "id": "identity_188254", "image": "A-OKVQA/images/71603.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bus service visible in the background?" }, { "from": "gpt", "value": "The answer is Chicago Transit Authority" } ], "clip_score": 24.546875, "loss": 1.891726016998291 }, { "id": "identity_76536", "image": "TQA/images/question_images/cellular_respiration_9041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What molecule is labeled as the starting point for glycolysis in the diagram?" }, { "from": "gpt", "value": "The answer is Glucose" } ], "clip_score": 22.203125, "loss": 1.7476118803024292 }, { "id": "identity_262075", "image": "ScienceQA/images/12749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Minnesota?\nChoices:\n(A) Olympia\n(B) Minneapolis\n(C) Saint Paul\n(D) Columbus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.625, "loss": 4.181393146514893 }, { "id": "identity_231098", "image": "TQA/images/question_images/hair_follicles_6998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the layers of the skin from the outermost layer to the layer right above the subcutaneous tissue, and what structures can be found within the dermis layer?" }, { "from": "gpt", "value": "The answer is Epidermis, Dermis; Sebaceous Gland, Erector Pili Muscle, Sweat Gland, Nerves, Hair Follicle, Collagen and Elastin Fibres, Artery, Vein" } ], "clip_score": 32.71875, "loss": 0.7743297815322876 }, { "id": "identity_116738", "image": "TextVQA/images/4254b138d56f688c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of military equipment is predominantly featured in the foreground of the image?" }, { "from": "gpt", "value": "The answer is A cannon" } ], "clip_score": 29.078125, "loss": 3.2143819332122803 }, { "id": "identity_36589", "image": "GeoQA+/images/2071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O直径,若∠AOC=140°,则∠D的度数是()\nChoices:\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 70°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.609375, "loss": 2.6552388668060303 }, { "id": "identity_259375", "image": "TextVQA/images/9ee213cca8cb49db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the baseball team that plays in this North Carolina stadium?" }, { "from": "gpt", "value": "The answer is University of North Carolina Tar Heels" } ], "clip_score": 23.59375, "loss": 1.6786059141159058 }, { "id": "identity_230972", "image": "DocVQA/images/qpvw0217_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the timeline of the manuscript's submission and revision process for the article \"Is There a Connection Between Estrogen and Parkinson's Disease?\"?" }, { "from": "gpt", "value": "The answer is Rejected by Neurology, revised 7/23/01, accepted 1/15/02, published May 2002" } ], "clip_score": 26.265625, "loss": 1.8385447263717651 }, { "id": "identity_137762", "image": "ChartQA/images/6381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage points between the 'Good' and 'Bad' economic situation in 2009?" }, { "from": "gpt", "value": "The answer is 19%" } ], "clip_score": 22.8125, "loss": 2.395115613937378 }, { "id": "identity_66967", "image": "ScienceQA/images/16714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets have the North pole colored in green?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.0625, "loss": 2.239299774169922 }, { "id": "identity_6047", "image": "VQA2.0/images/56048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could this be overseas?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.5234375, "loss": 2.866616725921631 }, { "id": "identity_121830", "image": "VQA2.0/images/283257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the shirt of the person seated at the far end of the table?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.03125, "loss": 3.6938228607177734 }, { "id": "identity_120489", "image": "Super-CLEVR/images/superCLEVR_new_000669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cars or two-wheeled vehicles in the image?" }, { "from": "gpt", "value": "The answer is Cars" } ], "clip_score": 21.546875, "loss": 2.5005199909210205 }, { "id": "identity_262376", "image": "ScienceQA/images/9526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird has a beak adapted for catching fish?\nChoices:\n(A) great blue heron\n(B) indigo bunting" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.53125, "loss": 3.9351208209991455 }, { "id": "identity_144679", "image": "DocVQA/images/fqyl0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who don't know if they will eat away from home more or less often in the combined sample?" }, { "from": "gpt", "value": "The answer is 2%" } ], "clip_score": 27.125, "loss": 2.1294713020324707 }, { "id": "identity_307989", "image": "A-OKVQA/images/292257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person closest to the ground using?\nChoices:\n(A) pogo stick\n(B) drill\n(C) machete\n(D) baseball glove" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.09375, "loss": 4.447774887084961 }, { "id": "identity_60841", "image": "FigureQA/images/68483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Dark Gold have the maximum area under the curve?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.703125, "loss": 3.1093170642852783 }, { "id": "identity_41991", "image": "FigureQA/images/39234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal sections for Orange and Medium Purple?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 42.25, "loss": 3.3707547187805176 }, { "id": "identity_279017", "image": "TextVQA/images/0a9610643fdb51ba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measurement indicated on the ruler for this piece of material?" }, { "from": "gpt", "value": "The answer is 8 inches " } ], "clip_score": 21.46875, "loss": 3.9186348915100098 }, { "id": "identity_225438", "image": "ScienceQA/images/19563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the concentration of pink particles in each solution, and how many additional pink particles would need to be added to Solution A to make its concentration twice that of Solution B?" }, { "from": "gpt", "value": "The answer is 4 particles" } ], "clip_score": 33.8125, "loss": 4.839267253875732 }, { "id": "identity_15717", "image": "DocVQA/images/gljm0223_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the page no. at bottom of the page?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 19.15625, "loss": 2.6138768196105957 }, { "id": "identity_311743", "image": "ScienceQA/images/15381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which creature's mouth is also adapted for gnawing?\nChoices:\n(A) raccoon\n(B) marmot" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.25, "loss": 3.642503261566162 }, { "id": "identity_332963", "image": "Super-CLEVR/images/superCLEVR_new_001970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the bicycle behind a matte car that is in front of a rubber car that is behind a tiny regular bus?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 26.1875, "loss": 3.5962440967559814 }, { "id": "identity_240115", "image": "PMC-VQA/images/PMC3539948_F2_177557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential implications of a reduction in large myelinated fibers on the overall function of the nervous system?" }, { "from": "gpt", "value": "The answer is Slower signal transmission, muscle weakness, sensory deficits, impaired coordination" } ], "clip_score": 22.78125, "loss": 2.388171434402466 }, { "id": "identity_275787", "image": "VizWiz/images/VizWiz_train_00004485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please inform me about the contents of the label on this bottle?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.40625, "loss": 3.234776258468628 }, { "id": "identity_58602", "image": "FigureQA/images/9287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 32.125, "loss": 3.5080208778381348 }, { "id": "identity_22657", "image": "GeoQA+/images/187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙o的直径,⁀{BC}=⁀{CD}=⁀{DE},∠COD=35°,则∠AOE的度数是()\nChoices:\n(A) 65°\n(B) 70°\n(C) 75°\n(D) 85°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.375, "loss": 2.7452714443206787 }, { "id": "identity_93151", "image": "IconQA/images/11949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of sides per shape?" }, { "from": "gpt", "value": "The answer is 7.5" } ], "clip_score": 26.421875, "loss": 2.1262190341949463 }, { "id": "identity_147560", "image": "DocVQA/images/rgvh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total carry-over amount mentioned for the year 1978?" }, { "from": "gpt", "value": "The answer is $23,375" } ], "clip_score": 20.125, "loss": 2.3917665481567383 }, { "id": "identity_305793", "image": "TextVQA/images/a00f8bc5793d21d3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you navigate this?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 15.0, "loss": 4.328258037567139 }, { "id": "identity_226483", "image": "VQA-AS/images/6974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two living beings in the image doing, and how does the environment contribute to their activities?" }, { "from": "gpt", "value": "The answer is Both are sleeping; the warm fireplace creates a cozy atmosphere" } ], "clip_score": 28.5, "loss": 3.392897129058838 }, { "id": "identity_147917", "image": "DocVQA/images/xfhb0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the chairman presiding over the Health Education Section?" }, { "from": "gpt", "value": "The answer is Mrs. Lala S. Burnett" } ], "clip_score": 24.09375, "loss": 2.9524757862091064 }, { "id": "identity_77653", "image": "AI2D/images/3977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many radial spines are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 29.84375, "loss": 4.955533027648926 }, { "id": "identity_176767", "image": "IconQA/images/40637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the frame was to be filled with the same shapes without overlapping, how many additional shapes would fit?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.34375, "loss": 2.2185423374176025 }, { "id": "identity_189897", "image": "PMC-VQA/images/PMC4038955_F1_292990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce these brain images?" }, { "from": "gpt", "value": "The answer is functional Magnetic Resonance Imaging (fMRI)" } ], "clip_score": 27.375, "loss": 0.934080958366394 }, { "id": "identity_66901", "image": "AI2D/images/4229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is illustrated by the transformation from sedimentary rock to metamorphic rock?" }, { "from": "gpt", "value": "The answer is Metamorphism" } ], "clip_score": 29.734375, "loss": 2.7301487922668457 }, { "id": "identity_44603", "image": "PlotQA/images/3497.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Y-axis labeled as?" }, { "from": "gpt", "value": "The answer is Trade with high income economies(%)" } ], "clip_score": 28.0, "loss": 2.9791295528411865 }, { "id": "identity_280907", "image": "DocVQA/images/rxdg0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who was the letter sent to?" }, { "from": "gpt", "value": "The answer is STATE OF CALIFORNIA" } ], "clip_score": 22.578125, "loss": 1.7933757305145264 }, { "id": "identity_178422", "image": "TabMWP/images/2162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern in the table continues, what would be the y value when x is 14?" }, { "from": "gpt", "value": "The answer is -2" } ], "clip_score": 30.375, "loss": 3.11560320854187 }, { "id": "identity_11344", "image": "PMC-VQA/images/PMC3463786_Fig3_158543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modalities are used to construct coronary blood flow models? \nChoices:\n(A) CT scan \n(B) PET scan \n(C) X-ray \n(D) MRI" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.8125, "loss": 3.823134660720825 }, { "id": "identity_197066", "image": "TextVQA/images/6200d8c873560e26.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of floral arrangement is visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Bouquet" } ], "clip_score": 11.421875, "loss": 3.5527570247650146 }, { "id": "identity_139705", "image": "DocVQA/images/jzgj0223_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total expenses for the Annual Finance Standalone Dec-09?" }, { "from": "gpt", "value": "The answer is 602.4" } ], "clip_score": 24.65625, "loss": 2.4320359230041504 }, { "id": "identity_152742", "image": "AI2D/images/4045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure depicted in the image?" }, { "from": "gpt", "value": "The answer is Core-Envelope Structure" } ], "clip_score": 37.5, "loss": 1.8098198175430298 }, { "id": "identity_314358", "image": "FigureQA/images/80142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there an intersection between Navy Blue and Medium Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.203125, "loss": 3.909000873565674 }, { "id": "identity_114242", "image": "Super-CLEVR/images/superCLEVR_new_000504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle in the image that is the same color as the cruiser but not a bus?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.90625, "loss": 3.265869140625 }, { "id": "identity_262079", "image": "FigureQA/images/81937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the most rough?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.03125, "loss": 3.9847636222839355 }, { "id": "identity_274652", "image": "FigureQA/images/75185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Saddle Brown? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.046875, "loss": 4.628968238830566 }, { "id": "identity_212248", "image": "VQA2.0/images/324428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are docked in the marina?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 23.25, "loss": 2.1368205547332764 }, { "id": "identity_222919", "image": "A-OKVQA/images/305170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What university is represented on the bus?" }, { "from": "gpt", "value": "The answer is New York University (NYU)" } ], "clip_score": 33.9375, "loss": 1.673283576965332 }, { "id": "identity_150612", "image": "ScienceQA/images/20893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the magnets in Pair 2?" }, { "from": "gpt", "value": "The answer is 0.75 inches" } ], "clip_score": 26.40625, "loss": 1.4381753206253052 }, { "id": "identity_93451", "image": "IconQA/images/66890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shape a 2D or 3D object?" }, { "from": "gpt", "value": "The answer is 3D (cone)" } ], "clip_score": 34.3125, "loss": 2.066058397293091 }, { "id": "identity_172478", "image": "IconQA/images/1100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the bean grows by 0.25 inches every day, how long will it be after 4 days?" }, { "from": "gpt", "value": "The answer is 6 inches" } ], "clip_score": 34.46875, "loss": 2.4142956733703613 }, { "id": "identity_295669", "image": "IconQA/images/30475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a crab, which picture corresponds to the eighth position in the sequence of pictures provided? Choices are:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.484375, "loss": 3.4531540870666504 }, { "id": "identity_193301", "image": "PMC-VQA/images/PMC5717843_Fig4_249361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the patient in this image?" }, { "from": "gpt", "value": "The answer is Head at top, feet to the left" } ], "clip_score": 28.59375, "loss": 4.174757957458496 }, { "id": "identity_310408", "image": "ScienceQA/images/9924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Maryland\n(B) North Carolina\n(C) Georgia\n(D) South Carolina" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 3.9733269214630127 }, { "id": "identity_60902", "image": "DocVQA/images/tsyv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What university is associated with Derrick B. Jelliffe?" }, { "from": "gpt", "value": "The answer is University of California, Los Angeles" } ], "clip_score": 17.03125, "loss": 2.280989646911621 }, { "id": "identity_163794", "image": "Geometry3K/images/2057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the trapezoid?" }, { "from": "gpt", "value": "The answer is 76 inches" } ], "clip_score": 38.4375, "loss": 2.2441248893737793 }, { "id": "identity_118568", "image": "VizWiz/images/VizWiz_train_00018957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What flavor is this chocolate bar?" }, { "from": "gpt", "value": "The answer is Milk Chocolate" } ], "clip_score": 20.984375, "loss": 1.750543236732483 }, { "id": "identity_13471", "image": "GeoQA+/images/1900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC中,CD是AB边上的高,若AB=1.5,BC=0.9,AC=1.2,则CD的值是()\nChoices:\n(A) 0.72\n(B) 2.0\n(C) 1.125\n(D) 0" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.671875, "loss": 2.619124174118042 }, { "id": "identity_222638", "image": "A-OKVQA/images/86588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the batter wearing?" }, { "from": "gpt", "value": "The answer is Cleats" } ], "clip_score": 23.34375, "loss": 3.1342849731445312 }, { "id": "identity_21682", "image": "PMC-VQA/images/PMC6599311_Fig1_1339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the coronal view of the ultrasound show? \nChoices:\n(A) Fetal clubfeet \n(B) Fetal urinary bladder \n(C) Distended ureters \n(D) Fetal brain " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.5, "loss": 4.742921829223633 }, { "id": "identity_320982", "image": "DocVQA/images/xjmf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much cholesterol is in the french fries?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 21.21875, "loss": 2.306244373321533 }, { "id": "identity_171549", "image": "IconQA/images/72975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total marbles are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.9375, "loss": 2.0108165740966797 }, { "id": "identity_229474", "image": "GeoQA+/images/4016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在矩形ABCD中,对角线AC、BD相交于点O,AE垂直平分BO,若AE=2√{3},求矩形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 48 square cm" } ], "clip_score": 27.84375, "loss": 2.8645708560943604 }, { "id": "identity_256307", "image": "TextVQA/images/859746175448ab2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second item listed under \"Things that can be exported\" and what is its corresponding column in the table?" }, { "from": "gpt", "value": "The answer is Work, All Editions without May" } ], "clip_score": 15.8515625, "loss": 6.7347092628479 }, { "id": "identity_175607", "image": "IconQA/images/56327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one marble is removed from the image, what is the new probability of selecting a green marble without looking?" }, { "from": "gpt", "value": "The answer is 3/4 or 0.75" } ], "clip_score": 33.28125, "loss": 1.2862958908081055 }, { "id": "identity_128691", "image": "TextVQA/images/0d9ce4258af6514d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language are the majority of the visible newspaper headlines written in?" }, { "from": "gpt", "value": "The answer is Spanish" } ], "clip_score": 29.921875, "loss": 3.494657516479492 }, { "id": "identity_154746", "image": "ScienceQA/images/21123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct mountain peaks can be identified in the background of the image?" }, { "from": "gpt", "value": "The answer is Count the number of peaks visible in the image" } ], "clip_score": 25.03125, "loss": 2.373335123062134 }, { "id": "identity_223537", "image": "TextVQA/images/e416f3bc0e1b6467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be identified in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.15625, "loss": 4.428971290588379 }, { "id": "identity_338558", "image": "PMC-VQA/images/PMC6550046_DMM039065F2_489391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does each row in the image indicate?\nChoices:\n(A) Glass brain projection \n(B) Statistical analyses \n(C) Transverse section level \n(D) Coordinates relative to Bregma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5625, "loss": 4.121768951416016 }, { "id": "identity_332942", "image": "PMC-VQA/images/PMC6180898_fig2_380617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which diagnostic imaging techniques were utilized in this scenario?\nChoices:\n(A) X-ray and MRI \n(B) CT and PET-CT \n(C) Ultrasound and PET-CT \n(D) CT and MRI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.921875, "loss": 3.6582770347595215 }, { "id": "identity_167435", "image": "TabMWP/images/27580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags contain an even number of orange candies?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 33.09375, "loss": 2.5019023418426514 }, { "id": "identity_301319", "image": "GeoQA+/images/5329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the image, the radius of circle O is 5, and chords AB and CD subtend central angles ∠AOB and ∠COD respectively. If ∠AOB and ∠COD are supplementary and chord CD is 6, what is the length of chord AB?\n\nChoices:\n(A) 6\n(B) 8\n(C) 5√{2}\n(D) 5√{3}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.625, "loss": 2.55876088142395 }, { "id": "identity_252571", "image": "TabMWP/images/1846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total travel time from the waterfront to the shopping district, and how many stops does the train make in between?" }, { "from": "gpt", "value": "The answer is 2 hours 35 minutes, 6 stops" } ], "clip_score": 29.953125, "loss": 2.2505667209625244 }, { "id": "identity_331703", "image": "A-OKVQA/images/89407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the gas tank of the motorcycle where the person is sitting?\nChoices:\n(A) red\n(B) white\n(C) blue\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.078125, "loss": 4.105014324188232 }, { "id": "identity_271153", "image": "VizWiz/images/VizWiz_train_00003746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what that says?" }, { "from": "gpt", "value": "The answer is llbean" } ], "clip_score": 25.375, "loss": 4.116624355316162 }, { "id": "identity_288523", "image": "PMC-VQA/images/PMC5432911_Fig8_165195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure labeled \"d\" in the image. Is it a habit, stamen, bract, or leaf?\nChoices:\n(A) Habit \n(B) Stamen \n(C) Bract \n(D) Leaf" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.796875, "loss": 4.109160423278809 }, { "id": "identity_54853", "image": "DVQA/images/bar_train_00143311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'coast' have a consistent value across all three categories?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.8125, "loss": 2.916471481323242 }, { "id": "identity_97597", "image": "IconQA/images/1839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are squares?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.328125, "loss": 2.6066157817840576 }, { "id": "identity_325033", "image": "ScienceQA/images/9155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a mineral?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.640625, "loss": 3.713057279586792 }, { "id": "identity_243819", "image": "IconQA/images/55613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each cube is 2 cm long, what is the total length of the tape in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 22.9375, "loss": 1.7533180713653564 }, { "id": "identity_176551", "image": "IconQA/images/86769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the rectangles the same size?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.15625, "loss": 3.2070260047912598 }, { "id": "identity_106816", "image": "Super-CLEVR/images/superCLEVR_new_001919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 20.625, "loss": 2.7542407512664795 }, { "id": "identity_303320", "image": "VQA2.0/images/17472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a busy place?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.3046875, "loss": 2.8743362426757812 }, { "id": "identity_50531", "image": "DVQA/images/bar_train_00097868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of data points representing accuracy for the 'weapon' dataset?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.15625, "loss": 2.369220733642578 }, { "id": "identity_201697", "image": "VizWiz/images/VizWiz_train_00016026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the background object with a white surface?" }, { "from": "gpt", "value": "The answer is Computer mouse" } ], "clip_score": 18.046875, "loss": 4.0621018409729 }, { "id": "identity_180969", "image": "CLEVR-Math/images/CLEVR_train_053305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a matte finish?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.328125, "loss": 2.5484533309936523 }, { "id": "identity_187372", "image": "A-OKVQA/images/236282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place in the center of the image?" }, { "from": "gpt", "value": "The answer is Watching giraffes" } ], "clip_score": 28.703125, "loss": 2.340064525604248 }, { "id": "identity_28777", "image": "TextVQA/images/f441f29812b385ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is a number of anyone in the white jersey's?" }, { "from": "gpt", "value": "The answer is 96" } ], "clip_score": 22.875, "loss": 2.7388923168182373 }, { "id": "identity_161353", "image": "GeoQA+/images/1633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if AB is 5, what is the perimeter of the rhombus?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 37.21875, "loss": 1.5331251621246338 }, { "id": "identity_258093", "image": "A-OKVQA/images/377589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the sand, and what objects can be seen around them?" }, { "from": "gpt", "value": "The answer is Four people, red bottle, mobile phone, glass of beer, red bag, chairs" } ], "clip_score": 27.859375, "loss": 2.9466664791107178 }, { "id": "identity_287140", "image": "UniGeo/images/57.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure, if AB = AC = AD, ∠CBD = 2∠BDC, and ∠BAC = 44°, what is the measure of ∠CAD in degrees?\nChoices:\n(A) 68°\n(B) 88°\n(C) 90°\n(D) 112°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.734375, "loss": 2.378000259399414 }, { "id": "identity_261189", "image": "VQA2.0/images/501540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of animals and trees in the image?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 15.765625, "loss": 2.7961082458496094 }, { "id": "identity_27183", "image": "IconQA/images/66417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sub. Which picture is second?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.171875, "loss": 3.962805986404419 }, { "id": "identity_86426", "image": "Geometry3K/images/1936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diagonal LR in the rhombus LMPQ?" }, { "from": "gpt", "value": "The answer is 11.18" } ], "clip_score": 37.65625, "loss": 2.244385004043579 }, { "id": "identity_92026", "image": "TabMWP/images/550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students picked topics related to physical sciences (chemistry, electricity, force and motion, light)?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 32.8125, "loss": 2.944540023803711 }, { "id": "identity_241309", "image": "IconQA/images/71236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of the spinner landing on pink, and what is the probability of the spinner landing on white?" }, { "from": "gpt", "value": "The answer is Probability of landing on pink is 0.25, and probability of landing on white is 0.75" } ], "clip_score": 34.09375, "loss": 1.0448758602142334 }, { "id": "identity_283201", "image": "PlotQA/images/23286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the total enrollment in private institutions in 2005 and 2006 higher than the highest enrollment in private institutions in any other year?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.53125, "loss": 3.2595279216766357 }, { "id": "identity_199377", "image": "PMC-VQA/images/PMC4881209_Fig6_34601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cell abnormality in this image?" }, { "from": "gpt", "value": "The answer is No signs of cell abnormality" } ], "clip_score": 22.390625, "loss": 2.5745935440063477 }, { "id": "identity_45574", "image": "FigureQA/images/81940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a value greater than 50 at x-axis label 20?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.875, "loss": 2.1848905086517334 }, { "id": "identity_70765", "image": "TQA/images/question_images/cycle_water_6001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the process indicated by the red arrow moving from the mountains towards the lake?" }, { "from": "gpt", "value": "The answer is Surface runoff" } ], "clip_score": 34.96875, "loss": 2.5605947971343994 }, { "id": "identity_322267", "image": "ScienceQA/images/12469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) the surroundings . . . each cake\n(B) each cake . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.5625, "loss": 3.81333589553833 }, { "id": "identity_2622", "image": "Super-CLEVR/images/superCLEVR_new_000499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tiny shiny cruiser?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 21.625, "loss": 4.678811550140381 }, { "id": "identity_71548", "image": "ScienceQA/images/18967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface the candies are placed on?" }, { "from": "gpt", "value": "The answer is Smooth" } ], "clip_score": 22.8125, "loss": 2.542752981185913 }, { "id": "identity_192058", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g004_308561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is labeled with the letter 'C'?" }, { "from": "gpt", "value": "The answer is Cartilage" } ], "clip_score": 22.0, "loss": 2.724602222442627 }, { "id": "identity_236454", "image": "VQA2.0/images/324037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three key characteristics of the natural environment shown in the image, and how do they support the biodiversity typically found in such environments?" }, { "from": "gpt", "value": "The answer is Dense vegetation, high humidity, canopy layers" } ], "clip_score": 25.140625, "loss": 3.452568292617798 }, { "id": "identity_95194", "image": "CLEVR-Math/images/CLEVR_train_002660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.234375, "loss": 2.4394893646240234 }, { "id": "identity_229489", "image": "VQA2.0/images/458374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary colors visible on the plane, and how are they distributed across different parts of the plane?" }, { "from": "gpt", "value": "The answer is Red on the tail, blue on the tail, white on the fuselage" } ], "clip_score": 28.296875, "loss": 2.496589183807373 }, { "id": "identity_191590", "image": "PMC-VQA/images/PMC6584983_Fig1_496311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of bowel obstruction in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.109375, "loss": 2.945072650909424 }, { "id": "identity_39941", "image": "FigureQA/images/85809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Light Green have the highest starting value?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.609375, "loss": 3.2377536296844482 }, { "id": "identity_65281", "image": "AI2D/images/497.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the food web is a primary consumer and has the least number of predators?" }, { "from": "gpt", "value": "The answer is antelope" } ], "clip_score": 30.65625, "loss": 2.288257122039795 }, { "id": "identity_178988", "image": "IconQA/images/10349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more circles in the top row or in the bottom row?" }, { "from": "gpt", "value": "The answer is Neither, both rows have the same number" } ], "clip_score": 33.84375, "loss": 2.4517862796783447 }, { "id": "identity_279584", "image": "DVQA/images/bar_train_00037129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of accuracy of the luxury algorithm in predicting the yield in the dataset?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 34.40625, "loss": 2.9726333618164062 }, { "id": "identity_79922", "image": "GeoQA+/images/3733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠CAB = 80° and CC' is parallel to AB, what is the measure of ∠AC'B?" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 30.265625, "loss": 1.7789287567138672 }, { "id": "identity_73288", "image": "TQA/images/abc_question_images/human_system_muscular_16165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label X indicate?" }, { "from": "gpt", "value": "The answer is Quadriceps" } ], "clip_score": 23.984375, "loss": 2.6145853996276855 }, { "id": "identity_188979", "image": "TextVQA/images/b0dbdaf636807b7c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car parked in front of the \"Printing\" shop?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.265625, "loss": 3.32932710647583 }, { "id": "identity_144308", "image": "DocVQA/images/szwg0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are listed under the Department of Nutritional Sciences, School of Public Health, UCLA?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.015625, "loss": 2.5262746810913086 }, { "id": "identity_316301", "image": "VQA-AS/images/14429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is that in the air?" }, { "from": "gpt", "value": "The answer is bird" } ], "clip_score": 26.296875, "loss": 3.170450448989868 }, { "id": "identity_168098", "image": "TabMWP/images/8349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teachers have a class size less than 30?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.71875, "loss": 2.561511993408203 }, { "id": "identity_193435", "image": "PMC-VQA/images/PMC6297868_F5_412484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the black horizontal line at the bottom of the image representing?" }, { "from": "gpt", "value": "The answer is Basilar membrane" } ], "clip_score": 24.65625, "loss": 3.5315115451812744 }, { "id": "identity_176295", "image": "CLEVR-Math/images/CLEVR_train_060510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither cubes nor cylinders?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.78125, "loss": 2.44773268699646 }, { "id": "identity_327608", "image": "PlotQA/images/112393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of this graph?" }, { "from": "gpt", "value": "The answer is Import and export statistics of goods and services in Saudi Arabia" } ], "clip_score": 40.90625, "loss": 1.280615210533142 }, { "id": "identity_92608", "image": "TabMWP/images/15471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of baseballs hit by the player per day?" }, { "from": "gpt", "value": "The answer is 89.857" } ], "clip_score": 37.78125, "loss": 2.3184216022491455 }, { "id": "identity_291305", "image": "PMC-VQA/images/PMC5797596_F5_271596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From where are the light yellow spherules in the 3D model derived? \nChoices:\n(A) inner membrane \n(B) outer membrane \n(C) nucleus \n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 4.976607799530029 }, { "id": "identity_50275", "image": "FigureQA/images/71396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the highest value at x-axis label 60?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.65625, "loss": 3.285128116607666 }, { "id": "identity_102311", "image": "CLEVR-Math/images/CLEVR_train_003555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of matte objects to shiny objects?" }, { "from": "gpt", "value": "The answer is 2:3" } ], "clip_score": 26.546875, "loss": 1.7346278429031372 }, { "id": "identity_146350", "image": "DocVQA/images/xgvh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the organization as per the document?" }, { "from": "gpt", "value": "The answer is 100 Bergen Street, Newark, New Jersey 07103" } ], "clip_score": 23.515625, "loss": 1.2883707284927368 }, { "id": "identity_214974", "image": "A-OKVQA/images/470984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any safety signs visible on the slope?" }, { "from": "gpt", "value": "The answer is Yes, there is a yellow sign" } ], "clip_score": 25.828125, "loss": 2.660862922668457 }, { "id": "identity_210152", "image": "VQA2.0/images/70794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of habitat is depicted in the image?" }, { "from": "gpt", "value": "The answer is A temperate rainforest" } ], "clip_score": 26.828125, "loss": 2.912062168121338 }, { "id": "identity_220374", "image": "VQA2.0/images/512292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images feature a person with a camera?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.90625, "loss": 2.671952486038208 }, { "id": "identity_294403", "image": "TextVQA/images/e4c065f9d9fc116e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the beer located at the far right?" }, { "from": "gpt", "value": "The answer is stallhager" } ], "clip_score": 25.140625, "loss": 5.247312068939209 }, { "id": "identity_306940", "image": "CLEVR-Math/images/CLEVR_train_004132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 blue metallic cube. How many blue metallic cubes are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.875, "loss": 2.446578025817871 }, { "id": "identity_170190", "image": "TabMWP/images/5238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags had a number of peanuts that is a multiple of 5?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.34375, "loss": 2.3188908100128174 }, { "id": "identity_270902", "image": "UniGeo/images/1922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, given that the line segment AB is 10cm long, M is the midpoint of AB, and point N lies on AB with NB measuring 2cm, what is the length of line segment MN?\nChoices:\n(A) 5cm\n(B) 4cm\n(C) 3cm\n(D) 2cm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.90625, "loss": 2.580766201019287 }, { "id": "identity_17538", "image": "MapQA/images/map_2313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the MidWest ?" }, { "from": "gpt", "value": "The answer is 73359-73487" } ], "clip_score": 25.9375, "loss": 2.4559433460235596 }, { "id": "identity_154979", "image": "ScienceQA/images/19050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the beaver's fur in the image?" }, { "from": "gpt", "value": "The answer is Dark brown" } ], "clip_score": 30.46875, "loss": 3.6105849742889404 }, { "id": "identity_134502", "image": "DocVQA/images/fmnw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time mentioned in the date of the fax transmission?" }, { "from": "gpt", "value": "The answer is 9:32am" } ], "clip_score": 22.59375, "loss": 2.69170880317688 }, { "id": "identity_322691", "image": "PMC-VQA/images/PMC4288831_F1_349593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which parts of the heart were affected by the air embolism?\nChoices:\n(A) Two parts\n(B) Three parts\n(C) Four parts\n(D) One part" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.78125, "loss": 3.9276325702667236 }, { "id": "identity_219286", "image": "A-OKVQA/images/51328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the fire hydrant predominantly painted with?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 33.03125, "loss": 3.505232334136963 }, { "id": "identity_126838", "image": "A-OKVQA/images/70636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.859375, "loss": 4.416337013244629 }, { "id": "identity_62977", "image": "ScienceQA/images/7112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlanta" } ], "clip_score": 27.6875, "loss": 3.2470741271972656 }, { "id": "identity_289557", "image": "A-OKVQA/images/510806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location does the policeman provide service? \n(A) Bloomington\n(B) Lafayette\n(C) Shreveport\n(D) Monroe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.53125, "loss": 3.9123897552490234 }, { "id": "identity_88853", "image": "GeoQA+/images/2398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle AOP?" }, { "from": "gpt", "value": "The answer is 2r + r * sqrt(7/4)" } ], "clip_score": 30.203125, "loss": 2.580840587615967 }, { "id": "identity_155077", "image": "ScienceQA/images/11702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the south?" }, { "from": "gpt", "value": "The answer is North Carolina" } ], "clip_score": 31.25, "loss": 2.8820207118988037 }, { "id": "identity_300380", "image": "ScienceQA/images/14404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can they trade to each get what they want?\nChoices:\n(A) Martha can trade her broccoli for Belle's oranges.\n(B) Belle can trade her tomatoes for Martha's broccoli.\n(C) Martha can trade her almonds for Belle's tomatoes.\n(D) Belle can trade her tomatoes for Martha's carrots." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.0, "loss": 3.269798994064331 }, { "id": "identity_309222", "image": "MapQA/images/map_5798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the highest value in the USA?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.734375, "loss": 2.7672781944274902 }, { "id": "identity_23030", "image": "A-OKVQA/images/63974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is likely sold here?\nChoices:\n(A) video games\n(B) action figures\n(C) newspaper\n(D) model airplanes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.828125, "loss": 3.9708733558654785 }, { "id": "identity_183392", "image": "Super-CLEVR/images/superCLEVR_new_000842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that has two wheels and a seat?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.59375, "loss": 4.074550151824951 }, { "id": "identity_213351", "image": "A-OKVQA/images/61498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest the photo was taken?" }, { "from": "gpt", "value": "The answer is Late afternoon or early evening" } ], "clip_score": 19.1875, "loss": 2.84177565574646 }, { "id": "identity_136453", "image": "FigureQA/images/4444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Purple bar value greater than 80?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.546875, "loss": 3.3431313037872314 }, { "id": "identity_84262", "image": "GeoQA+/images/5865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the slope continues at the same angle, how long would it be to reach a vertical height of 400m?" }, { "from": "gpt", "value": "The answer is 666.67m" } ], "clip_score": 34.78125, "loss": 1.6636857986450195 }, { "id": "identity_83356", "image": "UniGeo/images/152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD in the quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 31.25, "loss": 2.2499351501464844 }, { "id": "identity_63637", "image": "AI2D/images/1461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the labeled feature on the side of the volcano that looks like a smaller volcano?" }, { "from": "gpt", "value": "The answer is Parasitic Cone" } ], "clip_score": 35.5625, "loss": 2.460282802581787 }, { "id": "identity_279090", "image": "Geometry3K/images/1346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 1.\nOptions:\n(A) 31\n(B) 42\n(C) 59\n(D) 63" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.6875, "loss": 3.5727577209472656 }, { "id": "identity_25530", "image": "UniGeo/images/1007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C是⊙O上的三个点,若∠C=28°,则∠OBA的度数为()\nChoices:\n(A) 28°\n(B) 56°\n(C) 57°\n(D) 62°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.15625, "loss": 2.6436805725097656 }, { "id": "identity_52753", "image": "FigureQA/images/68969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of Cornflower?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 28.59375, "loss": 2.978498697280884 }, { "id": "identity_207542", "image": "PMC-VQA/images/PMC5265193_Fig4_118654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lobe of the brain is visible at the top of the image?" }, { "from": "gpt", "value": "The answer is Frontal lobe" } ], "clip_score": 31.515625, "loss": 2.1329457759857178 }, { "id": "identity_58876", "image": "DVQA/images/bar_train_00037822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of values represented in the \"share\" group?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 23.125, "loss": 2.3549704551696777 }, { "id": "identity_87176", "image": "GeoQA+/images/2857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that OC = 2 and ∠A = 30°, what is the length of line segment AC?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.1875, "loss": 2.5442283153533936 }, { "id": "identity_309382", "image": "FigureQA/images/4558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0, "loss": 3.862271547317505 }, { "id": "identity_164650", "image": "Geometry3K/images/2061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle JLK?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.15625, "loss": 1.9080052375793457 }, { "id": "identity_273533", "image": "IconQA/images/96633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using paper clips. The line is approximately (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.0, "loss": 2.3721039295196533 }, { "id": "identity_131761", "image": "VQA2.0/images/397893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the left side of the street?" }, { "from": "gpt", "value": "The answer is Bakery and Pastries shop" } ], "clip_score": 20.453125, "loss": 3.681924819946289 }, { "id": "identity_314230", "image": "VQA2.0/images/303140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is it doing with its trunk?" }, { "from": "gpt", "value": "The answer is eating" } ], "clip_score": 22.078125, "loss": 3.3502357006073 }, { "id": "identity_215600", "image": "A-OKVQA/images/10484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is above the train tracks?" }, { "from": "gpt", "value": "The answer is An overhead metal bridge" } ], "clip_score": 26.296875, "loss": 4.182214260101318 }, { "id": "identity_301710", "image": "Geometry3K/images/1041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle K.\nChoices:\n(A) 62\n(B) 72\n(C) 116\n(D) 160" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.90625, "loss": 2.4818193912506104 }, { "id": "identity_331028", "image": "ScienceQA/images/14436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) yellow\n(B) sticky" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.65625, "loss": 4.192270278930664 }, { "id": "identity_112670", "image": "VQA-RAD/images/synpic55948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.34375, "loss": 2.3914740085601807 }, { "id": "identity_319811", "image": "DocVQA/images/npjy0046_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the information code listed?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.109375, "loss": 4.312361717224121 }, { "id": "identity_202942", "image": "TextVQA/images/9e49151c28fe87ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much does the ticket cost at the door?" }, { "from": "gpt", "value": "The answer is 57€" } ], "clip_score": 6.9296875, "loss": 2.915938377380371 }, { "id": "identity_215870", "image": "PMC-VQA/images/PMC7600047_cancers-12-02937-f002_286941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hydrocephalus in the MRI image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.71875, "loss": 2.5737504959106445 }, { "id": "identity_192307", "image": "A-OKVQA/images/123535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fencing is used around the field?" }, { "from": "gpt", "value": "The answer is Metal grid fencing" } ], "clip_score": 26.515625, "loss": 3.8058762550354004 }, { "id": "identity_205406", "image": "A-OKVQA/images/111940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many exhaust pipes are visible on the motorcycle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.96875, "loss": 4.428778648376465 }, { "id": "identity_25158", "image": "A-OKVQA/images/297910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal are the people checking out?\nChoices:\n(A) goats\n(B) lambs\n(C) donkeys\n(D) horses" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.078125, "loss": 3.6357314586639404 }, { "id": "identity_158854", "image": "AI2D/images/153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the dashed line circling the Earth represent?" }, { "from": "gpt", "value": "The answer is Boundary of water bulges" } ], "clip_score": 35.96875, "loss": 2.9900901317596436 }, { "id": "identity_285646", "image": "PMC-VQA/images/PMC7001139_FIG3_111193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which abnormalities were detected in the CT scan out of the following options:\n(A) Small intestine dilation\n(B) Increase in renal mass\n(C) Decrease in liver size\n(D) Clear and normal scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.515625, "loss": 3.4317429065704346 }, { "id": "identity_5335", "image": "DocVQA/images/ljhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the document dated?" }, { "from": "gpt", "value": "The answer is Jan. 5, 1955" } ], "clip_score": 25.203125, "loss": 1.788992166519165 }, { "id": "identity_130384", "image": "TextVQA/images/a25cf75ae0134b03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What claim is made about the range of fireworks?" }, { "from": "gpt", "value": "The answer is The largest range of fireworks in the world" } ], "clip_score": 27.40625, "loss": 1.2894680500030518 }, { "id": "identity_125378", "image": "PMC-VQA/images/PMC7931080_clinpract-11-00007-f001_383898.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the mass indicated by the star have well-defined or ill-defined borders?" }, { "from": "gpt", "value": "The answer is The mass has partially well-defined and partially ill-defined borders" } ], "clip_score": 16.21875, "loss": 2.4102509021759033 }, { "id": "identity_221681", "image": "A-OKVQA/images/494434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the woman's handbag?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.4375, "loss": 4.254660606384277 }, { "id": "identity_60547", "image": "DocVQA/images/ykbv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number of the hotel?" }, { "from": "gpt", "value": "The answer is 5 18-06-20" } ], "clip_score": 19.515625, "loss": 2.6562530994415283 }, { "id": "identity_171012", "image": "TabMWP/images/19041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times did the number of employees with health care benefits remain the same from one year to the next?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 36.125, "loss": 2.534698486328125 }, { "id": "identity_113234", "image": "A-OKVQA/images/431494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the blue item the person is sitting on?" }, { "from": "gpt", "value": "The answer is Circular holes in a grid pattern" } ], "clip_score": 20.53125, "loss": 3.2450270652770996 }, { "id": "identity_118201", "image": "VizWiz/images/VizWiz_train_00001742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the pack of cigarettes on the bar?" }, { "from": "gpt", "value": "The answer is Marlboro" } ], "clip_score": 17.421875, "loss": 2.3745784759521484 }, { "id": "identity_212470", "image": "VQA2.0/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the canopy above the food stall?" }, { "from": "gpt", "value": "The answer is White with red text" } ], "clip_score": 19.59375, "loss": 4.672133445739746 }, { "id": "identity_153518", "image": "ScienceQA/images/16834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of poles are facing each other in Pair 2 of the magnets?" }, { "from": "gpt", "value": "The answer is North and South poles" } ], "clip_score": 32.65625, "loss": 1.9469159841537476 }, { "id": "identity_216436", "image": "PMC-VQA/images/PMC7274469_pntd.0008141.g002_184814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a pleural effusion in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.84375, "loss": 3.0831680297851562 }, { "id": "identity_115904", "image": "VQA-AS/images/16599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the soccer ball?" }, { "from": "gpt", "value": "The answer is white and black" } ], "clip_score": 16.453125, "loss": 2.7143356800079346 }, { "id": "identity_175572", "image": "TabMWP/images/16587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the chest of drawers and the set of bunk beds?" }, { "from": "gpt", "value": "The answer is $0.41" } ], "clip_score": 31.59375, "loss": 2.640342950820923 }, { "id": "identity_285333", "image": "MapQA/images/map_6966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the states with the highest value in the MidWest region?" }, { "from": "gpt", "value": "The answer is South Dakota" } ], "clip_score": 31.453125, "loss": 2.34401273727417 }, { "id": "identity_74928", "image": "AI2D/images/867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organisms are primary consumers in this food chain?" }, { "from": "gpt", "value": "The answer is Rabbit and Grasshopper" } ], "clip_score": 29.015625, "loss": 1.661781668663025 }, { "id": "identity_241919", "image": "TabMWP/images/21062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the weight of the winning dog from 2009 to 2013?" }, { "from": "gpt", "value": "The answer is 0.75 kg per year" } ], "clip_score": 39.46875, "loss": 1.6297632455825806 }, { "id": "identity_162990", "image": "UniGeo/images/1756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the right triangle ABC, with AB=10 cm and AC=6 cm, what is the measure of angle ADE?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 31.046875, "loss": 1.7240324020385742 }, { "id": "identity_142588", "image": "DVQA/images/bar_train_00164356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the items \"safety\" and \"data\"?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.515625, "loss": 2.3430140018463135 }, { "id": "identity_228870", "image": "IconQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total perimeter of all the rectangles combined?" }, { "from": "gpt", "value": "The answer is 18 units" } ], "clip_score": 31.203125, "loss": 1.9851096868515015 }, { "id": "identity_216496", "image": "PMC-VQA/images/PMC8123265_jcm-10-01861-f003_441269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of orbital fracture in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.46875, "loss": 2.96355938911438 }, { "id": "identity_253311", "image": "VQA2.0/images/256799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many light sources are visible in the image, and how many of them are on?" }, { "from": "gpt", "value": "The answer is three light sources, all of which are on" } ], "clip_score": 20.984375, "loss": 2.070488452911377 }, { "id": "identity_243725", "image": "DocVQA/images/kqcg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pages in the article, and what is the average number of words per page if the article contains 1500 words in total?" }, { "from": "gpt", "value": "The answer is 3 pages, 500 words per page" } ], "clip_score": 10.21875, "loss": 1.5748904943466187 }, { "id": "identity_101620", "image": "IconQA/images/34735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add three more green circles to the second row, how many green circles will there be in total?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 36.1875, "loss": 2.0009777545928955 }, { "id": "identity_113819", "image": "TextVQA/images/6121798239949548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the ground where the parade is taking place?" }, { "from": "gpt", "value": "The answer is Multicolored geometric shapes" } ], "clip_score": 18.875, "loss": 3.044664144515991 }, { "id": "identity_153831", "image": "ScienceQA/images/2816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 26.265625, "loss": 2.8061718940734863 }, { "id": "identity_153737", "image": "ScienceQA/images/17024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that is northeast of Oregon?" }, { "from": "gpt", "value": "The answer is Boise" } ], "clip_score": 25.59375, "loss": 2.7017030715942383 }, { "id": "identity_158910", "image": "ScienceQA/images/4194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of butterfish in this food chain?" }, { "from": "gpt", "value": "The answer is Secondary consumer" } ], "clip_score": 34.625, "loss": 3.9106733798980713 }, { "id": "identity_233303", "image": "GeoQA+/images/2775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,在△ABC中,已知点D,E,F分别为边BC,AD,CE的中点,且S△ABC=4cm²,求△DEF的周长。" }, { "from": "gpt", "value": "The answer is (a + b + c)/2" } ], "clip_score": 25.65625, "loss": 1.865704894065857 }, { "id": "identity_27483", "image": "IconQA/images/19468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are on the frame?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.09375, "loss": 2.579536199569702 }, { "id": "identity_142523", "image": "DocVQA/images/khcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who has forwarded the meeting request to additional recipients?" }, { "from": "gpt", "value": "The answer is Kime, Robin" } ], "clip_score": 24.03125, "loss": 5.092155933380127 }, { "id": "identity_166453", "image": "IconQA/images/74273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all triangles identical in shape and size?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.609375, "loss": 3.0901761054992676 }, { "id": "identity_255942", "image": "IconQA/images/1800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are there in the largest rectangular section, and what is the total number of dots in the remaining sections?" }, { "from": "gpt", "value": "The answer is 46 dots" } ], "clip_score": 32.53125, "loss": 3.0433413982391357 }, { "id": "identity_286956", "image": "DocVQA/images/rgvh0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the check issued?" }, { "from": "gpt", "value": "The answer is NECSA" } ], "clip_score": 19.984375, "loss": 4.155182838439941 }, { "id": "identity_23826", "image": "DocVQA/images/ymfv0228_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the schedule at the time of 12:30 pm as mentioned in the given page ?" }, { "from": "gpt", "value": "The answer is Lunch" } ], "clip_score": 19.90625, "loss": 2.9962997436523438 }, { "id": "identity_232474", "image": "TextVQA/images/d7481a13ffca5083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company operating the train, and how many workers are visible in the image?" }, { "from": "gpt", "value": "The answer is Virgin, 9" } ], "clip_score": 26.1875, "loss": 3.981903553009033 }, { "id": "identity_21262", "image": "Geometry3K/images/476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the measure of m \\angle 3. Assume that segments that appear tangent are tangent.\nChoices:\n(A) 12\n(B) 20\n(C) 26\n(D) 28" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.34375, "loss": 3.057102918624878 }, { "id": "identity_31309", "image": "GeoQA+/images/4820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在菱形ABCD中,AC与BD相交于点O,BC的垂直平分线EF分别交BC,AC于点E、F,连接DF,若∠BCD=70°,则∠ADF的度数是()\nChoices:\n(A) 60°\n(B) 75°\n(C) 80°\n(D) 110°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.140625, "loss": 2.5735809803009033 }, { "id": "identity_103973", "image": "VQA2.0/images/235233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.65625, "loss": 2.436704397201538 }, { "id": "identity_267852", "image": "VQA2.0/images/374135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter is located on the wing of the plane?" }, { "from": "gpt", "value": "The answer is n" } ], "clip_score": 17.875, "loss": 3.6115944385528564 }, { "id": "identity_221269", "image": "VQA2.0/images/445004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the animals' habitat?" }, { "from": "gpt", "value": "The answer is Sandy or beige" } ], "clip_score": 24.078125, "loss": 3.130502700805664 }, { "id": "identity_5077", "image": "TQA/images/question_images/human_system_reproductory_7021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Urine empties from Urinary bladder, through the Urethra and out which of the following?\nChoices:\n(A) External urethral orifice\n(B) Hymen\n(C) Clitoris\n(D) Mons pubis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.21875, "loss": 4.201573371887207 }, { "id": "identity_247238", "image": "TextVQA/images/0cd50fe4a0c2b85b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two brands advertised on the banners visible in the image, and what is the primary color of each banner?" }, { "from": "gpt", "value": "The answer is New Era (black), Modell's Sporting Goods (red)" } ], "clip_score": 20.390625, "loss": 1.7963674068450928 }, { "id": "identity_108729", "image": "TextVQA/images/caa954ccc2fee453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any images of animals on the page?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 11.515625, "loss": 2.984616756439209 }, { "id": "identity_121050", "image": "TextVQA/images/047fc464e23ef72e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many makeup brushes are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.546875, "loss": 2.414992570877075 }, { "id": "identity_321639", "image": "IconQA/images/40599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.078125, "loss": 3.9593403339385986 }, { "id": "identity_195532", "image": "PMC-VQA/images/PMC7725490_FIG2_317205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the ventricles in the image?" }, { "from": "gpt", "value": "The answer is A measurement tool is needed for an accurate answer" } ], "clip_score": 27.34375, "loss": 3.8494772911071777 }, { "id": "identity_294958", "image": "DocVQA/images/fncf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the essential factor in effectively managing multiple disease states?" }, { "from": "gpt", "value": "The answer is Nutritional care" } ], "clip_score": 16.296875, "loss": 3.67421817779541 }, { "id": "identity_204170", "image": "A-OKVQA/images/1700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the snow on the ground?" }, { "from": "gpt", "value": "The answer is Packed and slightly rough" } ], "clip_score": 24.546875, "loss": 4.12500524520874 }, { "id": "identity_40165", "image": "FigureQA/images/13186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Dark Orchid in the pie chart?" }, { "from": "gpt", "value": "The answer is 25%" } ], "clip_score": 41.96875, "loss": 1.6284430027008057 }, { "id": "identity_324635", "image": "PMC-VQA/images/PMC4861721_F5_29095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in image (D)?\nChoices:\n(A) A seedling with no leaves.\n(B) A seedling with two overlapped leaves.\n(C) A seedling with two separate leaves.\n(D) A seedling with one leaf slightly overlapped by a nearby seedling leaf." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.546875, "loss": 3.195988178253174 }, { "id": "identity_329530", "image": "PlotQA/images/8562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the legend located in the graph?" }, { "from": "gpt", "value": "The answer is bottom center" } ], "clip_score": 17.828125, "loss": 4.228368759155273 }, { "id": "identity_202462", "image": "A-OKVQA/images/570601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players from the batting team are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.578125, "loss": 4.4687724113464355 }, { "id": "identity_95314", "image": "IconQA/images/74387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many marbles are in the third row from the top?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.625, "loss": 2.4046194553375244 }, { "id": "identity_77737", "image": "ScienceQA/images/18530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bird predominantly?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 27.984375, "loss": 3.8217177391052246 }, { "id": "identity_44861", "image": "DocVQA/images/hjcw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the estimated budget for the year 2000 alone?" }, { "from": "gpt", "value": "The answer is $250,000" } ], "clip_score": 23.265625, "loss": 1.7113877534866333 }, { "id": "identity_96814", "image": "IconQA/images/53692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one die, what is the new total number of dots?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 31.203125, "loss": 2.311162233352661 }, { "id": "identity_317144", "image": "Geometry3K/images/575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle G.\nChoices:\n(A) 93\n(B) 97\n(C) 106\n(D) 116" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.34375, "loss": 2.496859312057495 }, { "id": "identity_135234", "image": "ChartQA/images/8028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest percentage of 'Yes' opinion among the given statements?" }, { "from": "gpt", "value": "The answer is 68%" } ], "clip_score": 13.3984375, "loss": 2.004260540008545 }, { "id": "identity_301517", "image": "TabMWP/images/4553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a candy factory, butterscotch candies were packaged into bags of different sizes. What is the smallest number of candies?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 28.109375, "loss": 2.73510479927063 }, { "id": "identity_17552", "image": "DVQA/images/bar_train_00003456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units of the the most sold item were sold?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 25.46875, "loss": 2.374079942703247 }, { "id": "identity_27626", "image": "VQA-AS/images/17084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the dog sitting on the bench?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.640625, "loss": 3.7992019653320312 }, { "id": "identity_308096", "image": "TextVQA/images/e271d16f9a60e06e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address number of this place?" }, { "from": "gpt", "value": "The answer is 57" } ], "clip_score": 13.03125, "loss": 2.268882989883423 }, { "id": "identity_225247", "image": "IconQA/images/31537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many kites are there in each row and column, and what is the total number of kites?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.5, "loss": 3.6261634826660156 }, { "id": "identity_285427", "image": "PMC-VQA/images/PMC3751478_F3_226867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image was utilized to confirm the atrophy of the vermis and the cerebellar hemispheres?\nOptions:\n(A) PET image\n(B) CT scan image\n(C) T2-weighted image\n(D) MRI image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.8125, "loss": 4.602694511413574 }, { "id": "identity_68075", "image": "ScienceQA/images/16072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tubes are connected to the top of the container?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.34375, "loss": 4.256516456604004 }, { "id": "identity_29080", "image": "DocVQA/images/kzbn0226_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Page Number?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 14.109375, "loss": 3.3159844875335693 }, { "id": "identity_311058", "image": "GEOS/images/016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram at the right, the segments shown are tangent to the circle. Find the value of something.\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 9" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.71875, "loss": 2.6850368976593018 }, { "id": "identity_301013", "image": "DocVQA/images/fxhw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the document?" }, { "from": "gpt", "value": "The answer is October 9, 1984" } ], "clip_score": 26.921875, "loss": 1.4045758247375488 }, { "id": "identity_117703", "image": "VizWiz/images/VizWiz_train_00015927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company produces this rum?" }, { "from": "gpt", "value": "The answer is Fred L Myers & Son" } ], "clip_score": 31.71875, "loss": 3.28558611869812 }, { "id": "identity_63973", "image": "ScienceQA/images/17734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the device shown in the image likely to be manually operated or autonomous?" }, { "from": "gpt", "value": "The answer is Likely to be autonomous" } ], "clip_score": 19.734375, "loss": 2.1395668983459473 }, { "id": "identity_227037", "image": "Super-CLEVR/images/superCLEVR_new_000665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of the same material as the big cyan object, and what are their shapes?" }, { "from": "gpt", "value": "The answer is Three objects: bicycle, dirtbike, motorcycle" } ], "clip_score": 30.421875, "loss": 2.7146763801574707 }, { "id": "identity_64180", "image": "AI2D/images/3665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Partition between chambers\"?" }, { "from": "gpt", "value": "The answer is It separates the left and right sides of the heart" } ], "clip_score": 34.625, "loss": 1.8029732704162598 }, { "id": "identity_168817", "image": "IconQA/images/27887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both green and squares?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.8125, "loss": 2.380235195159912 }, { "id": "identity_56758", "image": "DocVQA/images/ghgw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is the luncheon scheduled?" }, { "from": "gpt", "value": "The answer is 12 noon" } ], "clip_score": 19.71875, "loss": 1.8218220472335815 }, { "id": "identity_284181", "image": "A-OKVQA/images/171192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the woman in the red apron engaged in?\nChoices:\n(A) bartending\n(B) wine demo\n(C) waiting tables\n(D) party catering" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 4.566100597381592 }, { "id": "identity_222989", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is labeled with \"CT\" in the image?" }, { "from": "gpt", "value": "The answer is Connective tissue" } ], "clip_score": 28.203125, "loss": 2.662612199783325 }, { "id": "identity_75095", "image": "TQA/images/question_images/human_system_digestive_1392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the flap that covers the trachea during swallowing?" }, { "from": "gpt", "value": "The answer is Epiglottis" } ], "clip_score": 27.765625, "loss": 1.5560252666473389 }, { "id": "identity_113667", "image": "TextVQA/images/9a51e9e16197a20d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much RAM does the laptop have?" }, { "from": "gpt", "value": "The answer is 256 MB of RAM" } ], "clip_score": 25.890625, "loss": 1.6011486053466797 }, { "id": "identity_229505", "image": "ScienceQA/images/5131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is directly south of the colony shown and what is its name?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 31.40625, "loss": 3.4655959606170654 }, { "id": "identity_337764", "image": "ScienceQA/images/1576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What question can Erin's experiment best address?\nChoices:\n(A) Do slugs eat more from tomato leaves or broccoli leaves?\n(B) Do slugs weigh more after eating tomato leaves or broccoli leaves?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.5625, "loss": 3.398876190185547 }, { "id": "identity_114371", "image": "VizWiz/images/VizWiz_train_00009512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the leaves on the quilt?" }, { "from": "gpt", "value": "The answer is Oval-shaped leaves" } ], "clip_score": 29.234375, "loss": 2.794670820236206 }, { "id": "identity_68268", "image": "AI2D/images/1935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are present in the food web?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.546875, "loss": 2.3807899951934814 }, { "id": "identity_94820", "image": "TabMWP/images/31808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the rate of change for the function represented by the table?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.875, "loss": 2.5989465713500977 }, { "id": "identity_78626", "image": "ScienceQA/images/3939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of zooplankton are shown in this food web?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.09375, "loss": 2.11806583404541 }, { "id": "identity_19964", "image": "IconQA/images/2120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long." }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 34.0625, "loss": 2.523648977279663 }, { "id": "identity_115550", "image": "VQA2.0/images/231524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What message is written on the back of the person in the center?" }, { "from": "gpt", "value": "The answer is STOP CLIMATE CHAOS" } ], "clip_score": 16.984375, "loss": 1.6343144178390503 }, { "id": "identity_159323", "image": "GeoQA+/images/147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the circle O?" }, { "from": "gpt", "value": "The answer is 25π" } ], "clip_score": 35.8125, "loss": 2.2955946922302246 }, { "id": "identity_86079", "image": "GeoQA+/images/1631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ABCD is a parallelogram with perimeter 20cm, and AB is not equal to AD, if AB = 7cm, what is the length of AD?" }, { "from": "gpt", "value": "The answer is 3cm" } ], "clip_score": 30.171875, "loss": 2.068610668182373 }, { "id": "identity_278781", "image": "AI2D/images/413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this cycle, how many stages are there?\nChoices:\n(A) 4\n(B) 3\n(C) 5\n(D) 2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 2.9951138496398926 }, { "id": "identity_313751", "image": "IconQA/images/86204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many of these are there?\nChoices:\n(A) 4\n(B) 1\n(C) 5\n(D) 3\n(E) 2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.10546875, "loss": 3.216041088104248 }, { "id": "identity_62715", "image": "ScienceQA/images/14119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located at the southwestern corner of the map?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 26.890625, "loss": 2.7636520862579346 }, { "id": "identity_86357", "image": "GeoQA+/images/3651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACB in 图2 after the trash can handle AB has been rotated around point C?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 29.953125, "loss": 1.6202585697174072 }, { "id": "identity_178305", "image": "IconQA/images/30712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of fish facing left to the fish facing right?" }, { "from": "gpt", "value": "The answer is 2:3" } ], "clip_score": 28.484375, "loss": 1.6271111965179443 }, { "id": "identity_244725", "image": "UniGeo/images/1326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,过⊙O外一点P引⊙O的两条切线PA、PB,切点分别是A、B,OP交⊙O于点C,点D是⁀{AEB}上不与点A、点C重合的一个动点,连接AD、CD,若∠APB=60°,求∠ADC与∠AOD的和是多少度?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 25.265625, "loss": 1.4051398038864136 }, { "id": "identity_326934", "image": "TextVQA/images/01ab6b284642d087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the airline on the plane?" }, { "from": "gpt", "value": "The answer is united" } ], "clip_score": 27.484375, "loss": 3.330211639404297 }, { "id": "identity_322828", "image": "ChartQA/images/OECD_GROSS_DOMESTIC_SPENDING_ON_R&D_FRA_GRC_IRL_000058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year is the middle line at its highest point?" }, { "from": "gpt", "value": "The answer is 1996" } ], "clip_score": 17.890625, "loss": 2.1239092350006104 }, { "id": "identity_224948", "image": "IconQA/images/32991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 29.171875, "loss": 2.162134885787964 }, { "id": "identity_203524", "image": "VizWiz/images/VizWiz_train_00003715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pills are missing from the blister pack?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.640625, "loss": 2.263819694519043 }, { "id": "identity_52055", "image": "ChartQA/images/8312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of U.S. adults who have not too much or no confidence in the news media?" }, { "from": "gpt", "value": "The answer is 61%" } ], "clip_score": 33.4375, "loss": 1.753395915031433 }, { "id": "identity_303078", "image": "PMC-VQA/images/PMC7077121_FIG3_136239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the procedure reveal?\nChoices:\n(A) Enlarged right ventricle\n(B) Blockage in left coronary artery\n(C) Occlusive thrombus in right coronary artery\n(D) No abnormalities found." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.203125, "loss": 4.292984962463379 }, { "id": "identity_293713", "image": "DocVQA/images/ymlg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the employee number for Irene E Karl?" }, { "from": "gpt", "value": "The answer is 483378" } ], "clip_score": 19.890625, "loss": 2.6265647411346436 }, { "id": "identity_304445", "image": "ScienceQA/images/5665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown here?\nChoices:\n(A) Virginia\n(B) Wisconsin\n(C) New Hampshire\n(D) North Carolina" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.71875, "loss": 3.7907326221466064 }, { "id": "identity_96190", "image": "TabMWP/images/9683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the most expensive and least expensive tickets?" }, { "from": "gpt", "value": "The answer is $31.47" } ], "clip_score": 31.140625, "loss": 2.405036449432373 }, { "id": "identity_13652", "image": "ScienceQA/images/21173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Liz and Pedro trade to each get what they want?\nChoices:\n(A) Pedro can trade his broccoli for Liz's oranges.\n(B) Liz can trade her tomatoes for Pedro's carrots.\n(C) Pedro can trade his almonds for Liz's tomatoes.\n(D) Liz can trade her tomatoes for Pedro's broccoli." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.734375, "loss": 2.7135651111602783 }, { "id": "identity_329785", "image": "GeoQA+/images/3678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,∠ABC的平分线与AC的垂直平分线相交于一点,过该点作垂线分别垂直于BC和AB,垂足分别为F、G.若BG=4,AC=5,则△ABC的周长是()\n\nChoices:\n(A) 12\n(B) 13\n(C) 14\n(D) 15" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 2.3399150371551514 }, { "id": "identity_270740", "image": "TabMWP/images/31948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of stickers in the collections of the friends who compared their sticker collections?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 30.84375, "loss": 2.384723663330078 }, { "id": "identity_23163", "image": "A-OKVQA/images/437221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: World skate is the head controller of which sport?\nChoices:\n(A) surfing\n(B) swimming\n(C) skiing\n(D) skating" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.09375, "loss": 3.6586978435516357 }, { "id": "identity_296367", "image": "PMC-VQA/images/PMC6612391_fig1_5014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI was utilized for comparison during diagnosis?\nOptions:\n(A) T1 with gadolinium\n(B) T2\n(C) FLAIR\n(D) Diffusion-weighted" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.328125, "loss": 3.82531476020813 }, { "id": "identity_103368", "image": "PMC-VQA/images/PMC9172626_goac023-F1_303318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 24.53125, "loss": 4.031675815582275 }, { "id": "identity_200334", "image": "A-OKVQA/images/334511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the shirt of the person sitting on the left side of the image?" }, { "from": "gpt", "value": "The answer is Blue and white striped" } ], "clip_score": 16.46875, "loss": 3.4071168899536133 }, { "id": "identity_132798", "image": "TextVQA/images/5803b2404bf9470b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light showing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.9375, "loss": 2.809727668762207 }, { "id": "identity_5029", "image": "UniGeo/images/1566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知直线a∥b∥c,直线m、n与直线a、b、c分别交于点A、C、E、B、D、F,AC=4,CE=6,BD=3,则BF=()\nChoices:\n(A) 7\n(B) 7.5\n(C) 8\n(D) 8.5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.34375, "loss": 2.6161692142486572 }, { "id": "identity_53548", "image": "FigureQA/images/85092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the lowest starting value at x-axis label 0?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 28.859375, "loss": 3.401352643966675 }, { "id": "identity_185329", "image": "PMC-VQA/images/PMC3384927_fig1_143048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the arrow in the image?" }, { "from": "gpt", "value": "The answer is Meniscus" } ], "clip_score": 34.25, "loss": 3.745452880859375 }, { "id": "identity_306404", "image": "DocVQA/images/szjp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the American Academy of Pediatrics located?" }, { "from": "gpt", "value": "The answer is Chicago, Illinois" } ], "clip_score": 22.234375, "loss": 2.6887900829315186 }, { "id": "identity_214362", "image": "VizWiz/images/VizWiz_train_00014188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of table surface is the tray resting on?" }, { "from": "gpt", "value": "The answer is Wooden" } ], "clip_score": 16.453125, "loss": 3.1028382778167725 }, { "id": "identity_121569", "image": "A-OKVQA/images/383937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Late afternoon or early evening" } ], "clip_score": 17.203125, "loss": 2.8179614543914795 }, { "id": "identity_4207", "image": "PMC-VQA/images/PMC2895667_ppat-1000980-g007_68025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region of the brain is shown in the image? \nChoices:\n(A) Cerebellum \n(B) Hippocampus \n(C) Amygdala \n(D) Thalamus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.296875, "loss": 3.7743887901306152 }, { "id": "identity_64952", "image": "ScienceQA/images/14827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly north of the highlighted state?" }, { "from": "gpt", "value": "The answer is Indiana" } ], "clip_score": 29.5, "loss": 3.031278610229492 }, { "id": "identity_128530", "image": "A-OKVQA/images/567142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of watercraft is visible in the background to the right of the blue kayak?" }, { "from": "gpt", "value": "The answer is Motorboat or yacht" } ], "clip_score": 28.765625, "loss": 2.7919459342956543 }, { "id": "identity_5443", "image": "TQA/images/question_images/nuclear_energy_7098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of energy does the plant produce?\nChoices:\n(A) Turbine\n(B) Electricity\n(C) Pump\n(D) Gas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 4.072079658508301 }, { "id": "identity_150724", "image": "AI2D/images/118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the Earth's path around the Sun as shown in the image?" }, { "from": "gpt", "value": "The answer is Orbit" } ], "clip_score": 24.484375, "loss": 2.650585412979126 }, { "id": "identity_47393", "image": "PlotQA/images/93112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is Undisbursed external debt from private creditors" } ], "clip_score": 38.84375, "loss": 1.4480351209640503 }, { "id": "identity_211727", "image": "TextVQA/images/431c9a0556e0133f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the text on the envelope written in besides English?" }, { "from": "gpt", "value": "The answer is French" } ], "clip_score": 17.9375, "loss": 3.579676389694214 }, { "id": "identity_32980", "image": "GeoQA+/images/5106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,BC是⊙O的直径,AB是⊙O的弦,若∠AOC=60°,则∠OAB的度数是()\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.421875, "loss": 2.4764363765716553 }, { "id": "identity_338648", "image": "A-OKVQA/images/94625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: These walls have a similar color scheme to which place?\nChoices:\n(A) nathan's\n(B) arthur treacher's\n(C) popeye's\n(D) kfc" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.5, "loss": 3.606809139251709 }, { "id": "identity_193702", "image": "VQA2.0/images/90983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible field markings are there on the grass?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.421875, "loss": 4.000095367431641 }, { "id": "identity_108928", "image": "PMC-VQA/images/PMC3488525_F3_163820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of liver cirrhosis in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.40625, "loss": 2.8585212230682373 }, { "id": "identity_178751", "image": "TabMWP/images/34165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pieces of fruit picked from all trees?" }, { "from": "gpt", "value": "The answer is 267" } ], "clip_score": 34.28125, "loss": 2.106998920440674 }, { "id": "identity_159834", "image": "UniGeo/images/200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC' in the image?" }, { "from": "gpt", "value": "The answer is 113 degrees" } ], "clip_score": 34.78125, "loss": 2.525852680206299 }, { "id": "identity_278693", "image": "VQA-RAD/images/synpic33422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of hilar enlargement?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.8359375, "loss": 2.7067017555236816 }, { "id": "identity_53733", "image": "FigureQA/images/56463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the Dark Magenta and Light Coral bars?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.90625, "loss": 2.7815821170806885 }, { "id": "identity_222109", "image": "VQA2.0/images/43664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pizza slices are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.390625, "loss": 4.3195695877075195 }, { "id": "identity_160537", "image": "Geometry3K/images/783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of sector AGF in circle G if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 65.45 square units" } ], "clip_score": 37.09375, "loss": 2.168292284011841 }, { "id": "identity_287826", "image": "PMC-VQA/images/PMC4896236_Fig1_38560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the red color highlighting in the image?\nChoices:\n(A) 16mer chains \n(B) B chains \n(C) A chains \n(D) 24mer chains" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.375, "loss": 4.0203399658203125 }, { "id": "identity_220037", "image": "PMC-VQA/images/PMC7672003_F3_300026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the different colors used in the image?" }, { "from": "gpt", "value": "The answer is To distinguish anatomical parts of the brain" } ], "clip_score": 29.671875, "loss": 3.4638142585754395 }, { "id": "identity_253913", "image": "VQA2.0/images/262550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators that the person is not riding the motorcycle, and what is the person doing instead?" }, { "from": "gpt", "value": "The answer is The person is posing with the motorcycle" } ], "clip_score": 29.765625, "loss": 2.9357218742370605 }, { "id": "identity_158976", "image": "UniGeo/images/702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACD in the given circle diagram?" }, { "from": "gpt", "value": "The answer is 65 degrees" } ], "clip_score": 34.84375, "loss": 2.2274222373962402 }, { "id": "identity_303212", "image": "PMC-VQA/images/PMC4391185_Fig3_376517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture these images?\nChoices:\n(A) X-ray \n(B) Magnetic Resonance Imaging (MRI) \n(C) Computed Tomography (CT) \n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.609375, "loss": 4.203488349914551 }, { "id": "identity_34746", "image": "A-OKVQA/images/482671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of cuisine is being served?\nChoices:\n(A) chinese\n(B) korean\n(C) indian\n(D) japanese" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.28125, "loss": 3.979126453399658 }, { "id": "identity_146646", "image": "DocVQA/images/ymyg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Director, Research Division" } ], "clip_score": 20.53125, "loss": 3.6151509284973145 }, { "id": "identity_145576", "image": "DocVQA/images/nnng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the transmission as mentioned in the report?" }, { "from": "gpt", "value": "The answer is 1'07" } ], "clip_score": 25.3125, "loss": 3.6734652519226074 }, { "id": "identity_126748", "image": "A-OKVQA/images/489845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the carpet the kids are sitting on?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.3125, "loss": 3.445932388305664 }, { "id": "identity_189078", "image": "TextVQA/images/f9dbb82f8e6e1058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many monitors are visible on the desk in the center of the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.796875, "loss": 4.26761531829834 }, { "id": "identity_247320", "image": "VQA2.0/images/80187.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the count of players visible on the field, including the batter, catcher, umpire, and any other players in the background?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 29.359375, "loss": 3.2158589363098145 }, { "id": "identity_319456", "image": "DocVQA/images/njnp0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of this artwork by André Masson?" }, { "from": "gpt", "value": "The answer is $250" } ], "clip_score": 28.359375, "loss": 2.634622097015381 }, { "id": "identity_43400", "image": "ChartQA/images/75460323001338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average public health expenditure of Zambia between 2000 and 2010?" }, { "from": "gpt", "value": "The answer is Approximately 2.5%" } ], "clip_score": 26.671875, "loss": 2.134470224380493 }, { "id": "identity_317853", "image": "TextVQA/images/8991e02929fa39fd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this book?" }, { "from": "gpt", "value": "The answer is the complete poetry of john milton" } ], "clip_score": 37.625, "loss": 2.2150161266326904 }, { "id": "identity_49673", "image": "DVQA/images/bar_train_00133588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people preferring the 'size' object between the 'cloth' and 'guitar' categories?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.03125, "loss": 2.6328601837158203 }, { "id": "identity_103557", "image": "PMC-VQA/images/PMC3548636_F3_179881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiopaque structure at the center top of the image?" }, { "from": "gpt", "value": "The answer is Nasal septum" } ], "clip_score": 30.3125, "loss": 2.7287325859069824 }, { "id": "identity_318652", "image": "ScienceQA/images/6807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) New Zealand\n(B) Australia\n(C) Papua New Guinea\n(D) the Marshall Islands" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.875, "loss": 3.786607265472412 }, { "id": "identity_277538", "image": "TextVQA/images/1217257695405075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two letters are displayed in white on the green bus?" }, { "from": "gpt", "value": "The answer is go" } ], "clip_score": 31.890625, "loss": 3.3640828132629395 }, { "id": "identity_251", "image": "TQA/images/abc_question_images/cycle_nitrogen_16710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is leaked into the air from a volcano?\nChoices:\n(A) AMMONIA\n(B) UPTAKE BY PLANTS\n(C) NITRATES\n(D) LIGHTNING" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.484375, "loss": 4.438973426818848 }, { "id": "identity_278505", "image": "DVQA/images/bar_train_00018714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units of the item lesson that were sold in all the stores combined?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 26.828125, "loss": 2.412824869155884 }, { "id": "identity_238757", "image": "VQA-RAD/images/synpic18651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the radiographic features that indicate cardiomegaly and pulmonary edema, and how do these features correlate with the clinical symptoms of heart failure?" }, { "from": "gpt", "value": "The answer is Cardiomegaly with pulmonary edema indicates heart failure symptoms" } ], "clip_score": 30.203125, "loss": 2.602719783782959 }, { "id": "identity_210956", "image": "A-OKVQA/images/470174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pet enclosure in the room?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.90625, "loss": 3.1868863105773926 }, { "id": "identity_153427", "image": "ScienceQA/images/21036.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Colorado" } ], "clip_score": 34.09375, "loss": 3.156785011291504 }, { "id": "identity_260039", "image": "IconQA/images/101802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the rectangle and the perimeter of a square with the same area?" }, { "from": "gpt", "value": "The answer is \\( 4\\sqrt{2} \\, \\text{cm} \\)" } ], "clip_score": 31.734375, "loss": 2.318830728530884 }, { "id": "identity_142187", "image": "DVQA/images/bar_train_00115125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm labeled \"mold\"?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.9375, "loss": 2.7107937335968018 }, { "id": "identity_261146", "image": "VQA2.0/images/238178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding motorbikes in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 25.65625, "loss": 2.4371187686920166 }, { "id": "identity_184591", "image": "PMC-VQA/images/PMC7935200_FIG2_385034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of kidney stones in this image?" }, { "from": "gpt", "value": "The answer is No evidence of kidney stones" } ], "clip_score": 32.3125, "loss": 2.9705376625061035 }, { "id": "identity_65389", "image": "TQA/images/abc_question_images/human_system_respiratory_12882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'X' indicate in the diagram?" }, { "from": "gpt", "value": "The answer is Bronchus" } ], "clip_score": 23.171875, "loss": 3.0548362731933594 }, { "id": "identity_110293", "image": "PMC-VQA/images/PMC7829776_viruses-13-00122-f002_351278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any white blood cells be identified in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.25, "loss": 2.701861619949341 }, { "id": "identity_299856", "image": "PMC-VQA/images/PMC3513975_fig1_170285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red arrows shown in the images?\nChoices:\n(A) Blood flow \n(B) Normal brain tissue \n(C) Tumors \n(D) Inflammatory lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 4.392589569091797 }, { "id": "identity_290648", "image": "A-OKVQA/images/312902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of medical assistance can you receive at the clinic on the right?\nChoices:\n(A) eyes\n(B) feet\n(C) teeth\n(D) back" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 6.73828125, "loss": 4.076236724853516 }, { "id": "identity_134344", "image": "ChartQA/images/multi_col_60878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of respondents who either don't know or have no opinion?" }, { "from": "gpt", "value": "The answer is 18%" } ], "clip_score": 29.734375, "loss": 2.0492823123931885 }, { "id": "identity_116673", "image": "VQA2.0/images/406188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible on the platform?" }, { "from": "gpt", "value": "The answer is Yes, there are people" } ], "clip_score": 18.0, "loss": 3.121697425842285 }, { "id": "identity_109618", "image": "A-OKVQA/images/47481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the kitchen countertop?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.84375, "loss": 3.9158406257629395 }, { "id": "identity_17374", "image": "DocVQA/images/yrvw0217_65.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first title in the document?" }, { "from": "gpt", "value": "The answer is Optimal Positioning" } ], "clip_score": 26.546875, "loss": 2.8888509273529053 }, { "id": "identity_45150", "image": "FigureQA/images/25530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Blue section represent more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.4375, "loss": 3.006865978240967 }, { "id": "identity_148068", "image": "FigureQA/images/60400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the color Black?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 21.6875, "loss": 3.073532819747925 }, { "id": "identity_58381", "image": "MapQA/images/map_10817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with a value below 98.5?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.40625, "loss": 2.933521270751953 }, { "id": "identity_101231", "image": "IconQA/images/30095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the grid squares covered by the letter 'Q' is no longer covered after it transforms into 'O'?" }, { "from": "gpt", "value": "The answer is 25%" } ], "clip_score": 32.28125, "loss": 1.4008772373199463 }, { "id": "identity_172686", "image": "TabMWP/images/6533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of Web pages visited by the three people?" }, { "from": "gpt", "value": "The answer is 3,066.33" } ], "clip_score": 35.21875, "loss": 1.8578122854232788 }, { "id": "identity_317956", "image": "FigureQA/images/76791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.078125, "loss": 4.490767002105713 }, { "id": "identity_59187", "image": "DocVQA/images/xybx0223_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the company's plant located at Guldhar?" }, { "from": "gpt", "value": "The answer is Delhi- Meerut Road, Guldhar, Ghaziabad - 201001" } ], "clip_score": 24.890625, "loss": 1.4321424961090088 }, { "id": "identity_265418", "image": "IconQA/images/9502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of birds present?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.296875, "loss": 2.1311721801757812 }, { "id": "identity_159487", "image": "GeoQA+/images/3618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCF?" }, { "from": "gpt", "value": "The answer is less than 90 degrees" } ], "clip_score": 32.09375, "loss": 2.030703067779541 }, { "id": "identity_139812", "image": "DocVQA/images/fspw0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Brand Code mentioned in the form?" }, { "from": "gpt", "value": "The answer is M120M" } ], "clip_score": 19.90625, "loss": 2.9476728439331055 }, { "id": "identity_108560", "image": "A-OKVQA/images/31923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of weather condition is depicted in the image?" }, { "from": "gpt", "value": "The answer is Rainy" } ], "clip_score": 22.375, "loss": 3.4045026302337646 }, { "id": "identity_321536", "image": "A-OKVQA/images/519784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the ground near the vehicle?\nChoices:\n(A) arrow\n(B) footprints\n(C) coyote\n(D) snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 4.98046875, "loss": 4.269210338592529 }, { "id": "identity_169831", "image": "IconQA/images/80620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If David arrived at the circus at 2:00 PM, how long has he been at the circus?" }, { "from": "gpt", "value": "The answer is 1 hour and 30 minutes" } ], "clip_score": 22.5625, "loss": 1.318250298500061 }, { "id": "identity_91080", "image": "GeoQA+/images/5540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the right triangle ABC, with AB = 5 and AC = 4, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is arctan(3/4)" } ], "clip_score": 30.78125, "loss": 1.9714983701705933 }, { "id": "identity_152338", "image": "ScienceQA/images/20808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in volume between the largest gas planet and the smallest ice planet according to the given data?" }, { "from": "gpt", "value": "The answer is 1,368,750 billion km^3" } ], "clip_score": 30.96875, "loss": 1.8160711526870728 }, { "id": "identity_77007", "image": "ScienceQA/images/19568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a territory is not \"independent,\" what might its governance structure look like based on the definitions given?" }, { "from": "gpt", "value": "The answer is Controlled by an external authority" } ], "clip_score": 28.453125, "loss": 3.268937587738037 }, { "id": "identity_59745", "image": "DVQA/images/bar_train_00137713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items have sold exactly 4 units in the 'stores' category?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.625, "loss": 2.2787139415740967 }, { "id": "identity_310621", "image": "TQA/images/question_images/skin_cross_section_7579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which part is the outermost layer of the skin?\nChoices:\n(A) hypodermis\n(B) pore\n(C) epidermis\n(D) dermis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.25, "loss": 4.028644561767578 }, { "id": "identity_5103", "image": "UniGeo/images/3076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A,B,C在⊙O上,∠AOB=72°,则∠ACB的度数是()\nChoices:\n(A) 18°\n(B) 36°\n(C) 54°\n(D) 72°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 2.523381471633911 }, { "id": "identity_2143", "image": "ScienceQA/images/252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Emilia's test show?\nChoices:\n(A) whether producing more insulin would help the bacteria grow faster\n(B) whether different types of bacteria would need different nutrients to produce insulin\n(C) whether she added enough nutrients to help the bacteria produce 20% more insulin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.53125, "loss": 3.614976167678833 }, { "id": "identity_232679", "image": "CLEVR-Math/images/CLEVR_train_013355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red objects are left after removing all blocks?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.5625, "loss": 2.251499891281128 }, { "id": "identity_269483", "image": "TextVQA/images/7615b7d8ed4ebd3c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which word is displayed on the watch?" }, { "from": "gpt", "value": "The answer is apps" } ], "clip_score": 27.578125, "loss": 4.104753017425537 }, { "id": "identity_242614", "image": "GeoQA+/images/5440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, A, B, C是⊙O上一点, 四边形ABCD是平行四边形, CD与⊙O相切, AD与⊙O交于点E, ∠D=70°, 求∠BAE的大小。" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 27.265625, "loss": 1.817227840423584 }, { "id": "identity_303433", "image": "TQA/images/question_images/radioactive_decay_8181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many components result from the beta decay of a neutron?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.078125, "loss": 2.975072145462036 }, { "id": "identity_279107", "image": "IconQA/images/33597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of dots?\nChoices:\n(A) 8\n(B) 9\n(C) 1\n(D) 4\n(E) 7" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.4375, "loss": 3.3516883850097656 }, { "id": "identity_222224", "image": "VQA-AS/images/8164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the painting above the fireplace?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 27.390625, "loss": 3.2606265544891357 }, { "id": "identity_68096", "image": "ScienceQA/images/1480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Early morning or late afternoon" } ], "clip_score": 14.1484375, "loss": 3.0893373489379883 }, { "id": "identity_256249", "image": "TextVQA/images/2633f60ac1bc0ea9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different sponsors have signs visible in the image, and what are their names?" }, { "from": "gpt", "value": "The answer is Six sponsors: Continental, Carlsberg, Kia, JVC, Toto, Svenska Spel" } ], "clip_score": 27.578125, "loss": 2.3361804485321045 }, { "id": "identity_169492", "image": "TabMWP/images/21639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the yellow T-shirt and the long-sleeved shirt?" }, { "from": "gpt", "value": "The answer is $2.00" } ], "clip_score": 28.953125, "loss": 1.8277440071105957 }, { "id": "identity_207539", "image": "TextVQA/images/594a3d5d06f173d9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which matchbook has a non-English text on it?" }, { "from": "gpt", "value": "The answer is The second matchbook from the left in the top row" } ], "clip_score": 29.671875, "loss": 2.328739881515503 }, { "id": "identity_307222", "image": "DVQA/images/bar_train_00122254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of bars are there?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 22.984375, "loss": 3.734952449798584 }, { "id": "identity_191425", "image": "PMC-VQA/images/PMC5240324_Fig11_115021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.875, "loss": 3.394296646118164 }, { "id": "identity_237163", "image": "TextVQA/images/66902c557d2aa2fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the intersection and direction indicated by the signs?" }, { "from": "gpt", "value": "The answer is Cherry St 1800 S and Highway Patrol, right direction" } ], "clip_score": 38.0, "loss": 1.8661006689071655 }, { "id": "identity_112246", "image": "Super-CLEVR/images/superCLEVR_new_001682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more blue objects or green objects in the image?" }, { "from": "gpt", "value": "The answer is Green objects" } ], "clip_score": 22.125, "loss": 2.6133110523223877 }, { "id": "identity_327066", "image": "PMC-VQA/images/PMC8780928_btm210261-fig-0004_173014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) ADSC isolation \n(B) ADSC proliferation \n(C) ADSC osteogenic differentiation \n(D) ADSC adipogenic differentiation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.453125, "loss": 4.184042930603027 }, { "id": "identity_283959", "image": "A-OKVQA/images/333434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which language is displayed on these signs? Is it:\n(A) Spanish\n(B) Braille\n(C) Finnish\n(D) Asian" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.4765625, "loss": 3.6397674083709717 }, { "id": "identity_216640", "image": "PMC-VQA/images/PMC4020501_fig2_288340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of invasion into adjacent structures by the cystic mass?" }, { "from": "gpt", "value": "The answer is No evidence of invasion into adjacent structures" } ], "clip_score": 25.265625, "loss": 2.516752004623413 }, { "id": "identity_207598", "image": "A-OKVQA/images/526801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are on the top shelf above the desk?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 24.453125, "loss": 4.423121452331543 }, { "id": "identity_97119", "image": "IconQA/images/20107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows of circles are there?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.53125, "loss": 2.406754732131958 }, { "id": "identity_260447", "image": "PMC-VQA/images/PMC3535132_Fig1_176628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of a comminuted fracture, and how do they differ from the characteristics of a greenstick fracture, an oblique fracture, and a spiral fracture as seen in the provided X-ray image?" }, { "from": "gpt", "value": "The answer is The X-ray shows multiple bone fragments, indicating a comminuted fracture" } ], "clip_score": 32.0625, "loss": 2.240562915802002 }, { "id": "identity_238052", "image": "FigureQA/images/52478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of the Crimson section to the area of the Dark Gold section?" }, { "from": "gpt", "value": "The answer is 3:2" } ], "clip_score": 32.78125, "loss": 1.5705797672271729 }, { "id": "identity_331060", "image": "A-OKVQA/images/267203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of transportation is shown here?\nChoices:\n(A) ferry\n(B) plane\n(C) car\n(D) rail" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6015625, "loss": 4.185782432556152 }, { "id": "identity_97606", "image": "TabMWP/images/9802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which restaurant had the highest revenue from cheese pizza sales?" }, { "from": "gpt", "value": "The answer is New York Slice" } ], "clip_score": 37.4375, "loss": 2.2413671016693115 }, { "id": "identity_134898", "image": "DocVQA/images/lmjf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Assistant manager mentioned in the document?" }, { "from": "gpt", "value": "The answer is Ms. Wakamatsu" } ], "clip_score": 22.390625, "loss": 2.4675774574279785 }, { "id": "identity_320273", "image": "VQA2.0/images/132878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this going?" }, { "from": "gpt", "value": "The answer is nowhere" } ], "clip_score": 15.9609375, "loss": 3.0315639972686768 }, { "id": "identity_93774", "image": "IconQA/images/71391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick an orange one?" }, { "from": "gpt", "value": "The answer is 5/5 or 1" } ], "clip_score": 36.28125, "loss": 2.5230913162231445 }, { "id": "identity_14485", "image": "TextVQA/images/4e50a5c4e9f9bcee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the second box honoring?" }, { "from": "gpt", "value": "The answer is elvis" } ], "clip_score": 20.15625, "loss": 3.8348348140716553 }, { "id": "identity_329620", "image": "VQA2.0/images/348621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the roofs of the buildings in the distance?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 20.921875, "loss": 3.949268341064453 }, { "id": "identity_293524", "image": "VQA2.0/images/259104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this device a phone?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.6875, "loss": 3.6875762939453125 }, { "id": "identity_75599", "image": "AI2D/images/3698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many salivary glands are labeled in this diagram?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.1875, "loss": 2.4599788188934326 }, { "id": "identity_27956", "image": "PMC-VQA/images/PMC6694554_Fig1_23746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located in the CT scan?\nChoices:\n(A) Right supraclavicular fossa \n(B) Left supraclavicular fossa \n(C) Subclavian fossa \n(D) Inferior clavicle fossa " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.78125, "loss": 4.6607160568237305 }, { "id": "identity_156857", "image": "TQA/images/question_images/parts_worm_7299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the first segment of the earthworm's body?" }, { "from": "gpt", "value": "The answer is Prostomium" } ], "clip_score": 32.1875, "loss": 2.3101775646209717 }, { "id": "identity_271324", "image": "TextVQA/images/5bd58184d9d8a6cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which year is the imprint found on this coin?" }, { "from": "gpt", "value": "The answer is 2001" } ], "clip_score": 29.0, "loss": 1.8458020687103271 }, { "id": "identity_284180", "image": "UniGeo/images/1228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and D lie on circle O, with angle A measuring 20°. Line BC is a tangent to circle O at point B, and the extension of line OD intersects BC at point C. What is the measure of angle OCB in degrees?\nChoices:\n(A) 20°\n(B) 40°\n(C) 50°\n(D) 80°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.1875, "loss": 2.4890732765197754 }, { "id": "identity_91897", "image": "IconQA/images/49113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more orange dot is added to an empty square, what will be the new ratio of orange dots to empty squares?" }, { "from": "gpt", "value": "The answer is 4:6 or 2:3" } ], "clip_score": 39.40625, "loss": 1.6234984397888184 }, { "id": "identity_189920", "image": "A-OKVQA/images/515660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the green item next to the bread?" }, { "from": "gpt", "value": "The answer is A lime" } ], "clip_score": 13.2578125, "loss": 4.2234416007995605 }, { "id": "identity_165268", "image": "Geometry3K/images/403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the polygons ABCDE and PQRST are similar, what is the length of side ST?" }, { "from": "gpt", "value": "The answer is 32/3 or 10.67" } ], "clip_score": 35.71875, "loss": 1.7378745079040527 }, { "id": "identity_173596", "image": "TabMWP/images/2816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more apple trees are there in Ross Park compared to Sawyer Park?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 30.78125, "loss": 2.270589828491211 }, { "id": "identity_237443", "image": "Geometry3K/images/354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of side y and the area of the triangle" }, { "from": "gpt", "value": "The answer is \\( y = 7\\sqrt{3} \\) and the area is \\( 24.5\\sqrt{3} \\)" } ], "clip_score": 33.0, "loss": 1.8488588333129883 }, { "id": "identity_306613", "image": "Geometry3K/images/1924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the area of this regular polygon. Round to the nearest tenth.\nChoices:\n(A) 27.7\n(B) 55.4\n(C) 166.3\n(D) 332.6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 2.729717969894409 }, { "id": "identity_236044", "image": "A-OKVQA/images/475889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in preparing the meat for this group, and what equipment is used in each step?" }, { "from": "gpt", "value": "The answer is Grill, table, bowls, and utensils" } ], "clip_score": 21.390625, "loss": 2.6260180473327637 }, { "id": "identity_275035", "image": "DVQA/images/bar_train_00097399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in the chart?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.328125, "loss": 3.738006114959717 }, { "id": "identity_8103", "image": "DVQA/images/bar_train_00063166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.65625, "loss": 4.292116641998291 }, { "id": "identity_61248", "image": "DVQA/images/bar_train_00073548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between 'charge' and 'cost' in the 'behalf' category?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 24.390625, "loss": 2.9280307292938232 }, { "id": "identity_337830", "image": "TabMWP/images/2663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Alan just missed the 12.45 P.M. bus at the famous bridge. When is the next bus?\nChoices:\n(A) 1:30 P.M.\n(B) 10:45 A.M.\n(C) 2:15 P.M.\n(D) 12:45 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.59375, "loss": 3.3491294384002686 }, { "id": "identity_111152", "image": "Super-CLEVR/images/superCLEVR_new_000567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the blue motorbike?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.953125, "loss": 2.274106979370117 }, { "id": "identity_230719", "image": "DocVQA/images/pkbg0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days old was Johnathan Evan on August 10th, 1998?" }, { "from": "gpt", "value": "The answer is 3 days" } ], "clip_score": 20.328125, "loss": 2.1119301319122314 }, { "id": "identity_301261", "image": "DocVQA/images/grhd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the educational qualification of this person?" }, { "from": "gpt", "value": "The answer is M. D., 1964 Univ. of Missouri" } ], "clip_score": 22.453125, "loss": 2.4770395755767822 }, { "id": "identity_223591", "image": "VizWiz/images/VizWiz_train_00019600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cup in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.90625, "loss": 3.638894557952881 }, { "id": "identity_158179", "image": "ScienceQA/images/11554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the poles in the image?" }, { "from": "gpt", "value": "The answer is North-South-South-North" } ], "clip_score": 20.140625, "loss": 2.2605783939361572 }, { "id": "identity_45954", "image": "FigureQA/images/73803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any overlap between the data points of Web Gray and Medium Turquoise on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.03125, "loss": 2.767609119415283 }, { "id": "identity_34993", "image": "PMC-VQA/images/PMC7698836_pathogens-09-00959-f003_309243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of contrast is seen within the soft tissue mass in the MRI image? \nChoices:\n(A) No contrast \n(B) Low contrast \n(C) High contrast \n(D) Incomplete contrast. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.3125, "loss": 3.756239652633667 }, { "id": "identity_250142", "image": "TabMWP/images/32551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does Brett need to buy a French horn, a saxophone, and a viola compared to buying just a clarinet?" }, { "from": "gpt", "value": "The answer is 1,219" } ], "clip_score": 38.3125, "loss": 2.522465944290161 }, { "id": "identity_68541", "image": "ScienceQA/images/13244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total items are listed in Gwen's want list?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.859375, "loss": 2.1156837940216064 }, { "id": "identity_130058", "image": "VQA-AS/images/4461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vegetation around the pond?" }, { "from": "gpt", "value": "The answer is Yes, there is a small plant" } ], "clip_score": 32.5, "loss": 2.8918745517730713 }, { "id": "identity_127015", "image": "A-OKVQA/images/478407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of salad is in the clear container in front of the man?" }, { "from": "gpt", "value": "The answer is Garden salad" } ], "clip_score": 22.625, "loss": 3.3092637062072754 }, { "id": "identity_251342", "image": "ScienceQA/images/19810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the process of how the force applied by the hammer results in the nail being driven into the wood, including the type of force involved and the interaction between the hammer, nail, and wood" }, { "from": "gpt", "value": "The answer is Push force drives the nail into the wood" } ], "clip_score": 32.28125, "loss": 3.44199538230896 }, { "id": "identity_57403", "image": "DVQA/images/bar_train_00038944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm represented by the red bars on the 'time' dataset?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.65625, "loss": 3.3104076385498047 }, { "id": "identity_202217", "image": "A-OKVQA/images/367362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the river's surface?" }, { "from": "gpt", "value": "The answer is Rippled" } ], "clip_score": 16.40625, "loss": 3.490692377090454 }, { "id": "identity_79807", "image": "UniGeo/images/42.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle ABC is 70 degrees, what is the measure of angle CAD?" }, { "from": "gpt", "value": "The answer is 20 degrees" } ], "clip_score": 35.09375, "loss": 1.7646679878234863 }, { "id": "identity_215389", "image": "Super-CLEVR/images/superCLEVR_new_000348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are present in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.359375, "loss": 2.4164981842041016 }, { "id": "identity_315972", "image": "DVQA/images/bar_train_00000151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest accuracy reported in this chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.296875, "loss": 3.264923334121704 }, { "id": "identity_295729", "image": "TabMWP/images/30456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: With $1.50, can Adriana afford to purchase both a hair tie and a lip balm?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.234375, "loss": 3.3233494758605957 }, { "id": "identity_296430", "image": "TQA/images/question_images/velocity_time_graphs_8210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this graphic, what is represented by the red line?\nChoices:\n(A) acceleration\n(B) deceleration\n(C) increasing speed\n(D) steady speed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.265625, "loss": 4.484753131866455 }, { "id": "identity_151521", "image": "AI2D/images/3882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are stipules and where are they located?" }, { "from": "gpt", "value": "The answer is Appendages at the base of leaf stalk" } ], "clip_score": 34.65625, "loss": 2.9021811485290527 }, { "id": "identity_272430", "image": "GeoQA+/images/1848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, it is known that AB is parallel to CD, AD intersects BC at point P, AB = 4, CD = 7, PD = 10, then the length of AP is equal to which of the following?\nChoices:\n(A) 40/11\n(B) 70/4\n(C) 70/11\n(D) 40/7" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.578125, "loss": 2.6830525398254395 }, { "id": "identity_39376", "image": "FigureQA/images/9808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the pie chart does Dark Gold represent?" }, { "from": "gpt", "value": "The answer is approximately 25%" } ], "clip_score": 39.21875, "loss": 2.03507924079895 }, { "id": "identity_248615", "image": "A-OKVQA/images/32773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary purpose of the safety feature on the front of the bus, and how does it contribute to overall road safety?" }, { "from": "gpt", "value": "The answer is Improve visibility and prevent collisions" } ], "clip_score": 19.046875, "loss": 3.0907881259918213 }, { "id": "identity_309210", "image": "Super-CLEVR/images/superCLEVR_new_000116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the size of the red shiny truck the same as the cyan aircraft?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.625, "loss": 5.1523356437683105 }, { "id": "identity_31297", "image": "ChartQA/images/two_col_43449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of highest two values?" }, { "from": "gpt", "value": "The answer is 55.6" } ], "clip_score": 16.78125, "loss": 2.6346957683563232 }, { "id": "identity_117235", "image": "Super-CLEVR/images/superCLEVR_new_001568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are present in the image?" }, { "from": "gpt", "value": "The answer is Bicycle, car, motorcycle, planes" } ], "clip_score": 27.6875, "loss": 2.416022300720215 }, { "id": "identity_259479", "image": "GeoQA+/images/2013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在矩形ABCD中,点E是AD上任一点,连接CE,F是CE的中点,若△BFC的面积为6,且AD的长度是AB的两倍,则矩形ABCD的周长是多少?" }, { "from": "gpt", "value": "The answer is \\( 12\\sqrt{3} \\)" } ], "clip_score": 27.1875, "loss": 2.292598009109497 }, { "id": "identity_99821", "image": "IconQA/images/61699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line was drawn on a piece of paper that is 21 centimeters long, what fraction of the paper's length does the line occupy?" }, { "from": "gpt", "value": "The answer is 17/52 or approximately 0.327 (rounded to three decimal places)" } ], "clip_score": 29.09375, "loss": 1.4633299112319946 }, { "id": "identity_119702", "image": "TextVQA/images/01023678b3f9c6bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the book the boy is excited about at the end of the comic?" }, { "from": "gpt", "value": "The answer is Space travel" } ], "clip_score": 24.03125, "loss": 3.188168525695801 }, { "id": "identity_251005", "image": "ChartQA/images/OECD_PRIMARY_ENERGY_SUPPLY_CHN_NAM_000004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the sum value of Namibia and China and the value of Namibia multiplied by 3?" }, { "from": "gpt", "value": "The answer is 0.001" } ], "clip_score": 26.40625, "loss": 2.1204638481140137 }, { "id": "identity_95968", "image": "IconQA/images/14364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area covered by the letter Q after the transformation in square units?" }, { "from": "gpt", "value": "The answer is The area covered by the letter Q is 12 square units" } ], "clip_score": 28.03125, "loss": 1.6591707468032837 }, { "id": "identity_215963", "image": "A-OKVQA/images/10321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the horse's mane?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 22.890625, "loss": 3.679868221282959 }, { "id": "identity_228955", "image": "A-OKVQA/images/560083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the items these men are displaying in relation to the aircraft behind them?" }, { "from": "gpt", "value": "The answer is They are qualified pilots for the aircraft behind them" } ], "clip_score": 25.765625, "loss": 3.256047487258911 }, { "id": "identity_56541", "image": "PlotQA/images/48318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Ethiopia have the lowest balance of payments for transport services as a percentage of commercial service imports?" }, { "from": "gpt", "value": "The answer is 2005" } ], "clip_score": 34.6875, "loss": 1.6046795845031738 }, { "id": "identity_218533", "image": "PMC-VQA/images/PMC3605910_pone-0059740-g006_193981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is labeled in the top left corner of this image?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.71875, "loss": 2.7163150310516357 }, { "id": "identity_218452", "image": "A-OKVQA/images/186095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the reins that the person is holding?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.890625, "loss": 2.9734084606170654 }, { "id": "identity_62747", "image": "AI2D/images/1841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are cows in this diagram classified as herbivores or carnivores?" }, { "from": "gpt", "value": "The answer is Herbivores" } ], "clip_score": 28.0, "loss": 1.7529748678207397 }, { "id": "identity_320248", "image": "PMC-VQA/images/PMC6211433_Fig2_389281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain regions are impacted by the signal abnormalities?\n\nChoices:\n(A) Occipital lobes \n(B) Frontal lobes \n(C) Internal capsule, basal ganglia, thalami, hypothalamus, and cerebral peduncles \n(D) Cerebellum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.046875, "loss": 3.803377866744995 }, { "id": "identity_308354", "image": "DVQA/images/bar_train_00019732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the largest individual bar in this chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.6875, "loss": 2.5903401374816895 }, { "id": "identity_62893", "image": "AI2D/images/4115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is shown in the image as occurring within the mitochondrion?" }, { "from": "gpt", "value": "The answer is cellular respiration" } ], "clip_score": 32.40625, "loss": 2.284520387649536 }, { "id": "identity_80363", "image": "Geometry3K/images/1102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If MN is parallel to PQ, and the length of segment MQ is 12, what is the length of segment PT?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 31.984375, "loss": 2.5937914848327637 }, { "id": "identity_58962", "image": "FigureQA/images/15873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Gray section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 33%" } ], "clip_score": 33.1875, "loss": 2.279583692550659 }, { "id": "identity_208540", "image": "PMC-VQA/images/PMC3100814_F0002_96654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the circular structure in the center of the image?" }, { "from": "gpt", "value": "The answer is It is a cross-section of a human limb, likely the upper arm, showing the humerus bone" } ], "clip_score": 28.328125, "loss": 2.2342727184295654 }, { "id": "identity_240050", "image": "VizWiz/images/VizWiz_train_00003264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unread notifications are there in total on this iPhone screen?" }, { "from": "gpt", "value": "The answer is 234" } ], "clip_score": 27.953125, "loss": 2.1889567375183105 }, { "id": "identity_55957", "image": "DocVQA/images/kshk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the event for which the reservations are made?" }, { "from": "gpt", "value": "The answer is January 30, 1969" } ], "clip_score": 21.328125, "loss": 1.8348190784454346 }, { "id": "identity_77133", "image": "ScienceQA/images/4546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 33.90625, "loss": 2.8733367919921875 }, { "id": "identity_25485", "image": "VQA-AS/images/379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the animal in the chair?" }, { "from": "gpt", "value": "The answer is cat" } ], "clip_score": 25.390625, "loss": 3.350743055343628 }, { "id": "identity_35349", "image": "VQA-AS/images/15124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl watching TV?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.6875, "loss": 3.872765302658081 }, { "id": "identity_48160", "image": "FigureQA/images/17828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Khaki section in the pie chart?" }, { "from": "gpt", "value": "The answer is 16.67%" } ], "clip_score": 36.34375, "loss": 1.3556338548660278 }, { "id": "identity_170670", "image": "IconQA/images/33311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ribbon on the present?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 32.03125, "loss": 2.652679443359375 }, { "id": "identity_321400", "image": "TabMWP/images/27298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Romero has a collection of marbles of various sizes and colors. What is the probability that a randomly chosen marble is small and red? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 5/11" } ], "clip_score": 27.0625, "loss": 1.9961848258972168 }, { "id": "identity_303815", "image": "PMC-VQA/images/PMC9053249_Fig1_268446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used for the scaffold in this image?\nChoices:\n(A) silk\n(B) PCLA\n(C) PLA\n(D) PGA" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.0390625, "loss": 4.623950004577637 }, { "id": "identity_125823", "image": "PMC-VQA/images/PMC3766644_F4_230375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is shown in this image?" }, { "from": "gpt", "value": "The answer is Pelvic region" } ], "clip_score": 29.890625, "loss": 3.4231796264648438 }, { "id": "identity_42777", "image": "FigureQA/images/69098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of the Gold line on the graph?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 26.734375, "loss": 2.8083512783050537 }, { "id": "identity_264806", "image": "FigureQA/images/50627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.15625, "loss": 3.74849009513855 }, { "id": "identity_236205", "image": "IconQA/images/66073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the number of circles to the number of squares, and what is the simplified form of this ratio?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 29.953125, "loss": 2.3597583770751953 }, { "id": "identity_87995", "image": "GeoQA+/images/3126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠ACO = 45°, what is the measure of ∠BAC?" }, { "from": "gpt", "value": "The answer is 67.5°" } ], "clip_score": 36.8125, "loss": 1.329035758972168 }, { "id": "identity_268864", "image": "TextVQA/images/c6efa7ccba50d7e5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sign is displayed in this image?" }, { "from": "gpt", "value": "The answer is stop" } ], "clip_score": 26.953125, "loss": 4.157596588134766 }, { "id": "identity_131157", "image": "TextVQA/images/962a527efa5c5d17.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the product according to the label?" }, { "from": "gpt", "value": "The answer is Use on any American food or mice" } ], "clip_score": 18.734375, "loss": 5.637294292449951 }, { "id": "identity_25836", "image": "GeoQA+/images/4047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知点A,B,C依次在⊙O上,∠B﹣∠A=40°,则∠AOB的度数为()\nChoices:\n(A) 70°\n(B) 72°\n(C) 80°\n(D) 84°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.4375, "loss": 2.6599981784820557 }, { "id": "identity_269131", "image": "IconQA/images/60109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of shapes that are circles among the total shapes? \n(A) 10/12\n(B) 2/10\n(C) 6/10\n(D) 6/7" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.953125, "loss": 3.3050122261047363 }, { "id": "identity_156879", "image": "TQA/images/question_images/aquifers_6517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the treated water pumped in from supply works?" }, { "from": "gpt", "value": "The answer is For storage and supply during high demand" } ], "clip_score": 28.25, "loss": 4.004055976867676 }, { "id": "identity_5813", "image": "TextVQA/images/ffcc4ef444b3c0ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of this store?" }, { "from": "gpt", "value": "The answer is lawson" } ], "clip_score": 35.09375, "loss": 3.055478096008301 }, { "id": "identity_260831", "image": "FigureQA/images/16315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the pie chart represented by Forest Green, and how does it compare to the percentage represented by Orange?" }, { "from": "gpt", "value": "The answer is Forest Green represents approximately 75% of the pie chart" } ], "clip_score": 43.6875, "loss": 2.2045834064483643 }, { "id": "identity_146537", "image": "DocVQA/images/hpxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned on the memorandum?" }, { "from": "gpt", "value": "The answer is November 3, 1978" } ], "clip_score": 28.09375, "loss": 1.8153624534606934 }, { "id": "identity_137099", "image": "PlotQA/images/115315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage of trained teachers between Pacific island small states and Liberia in 2005?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 30.828125, "loss": 1.962019920349121 }, { "id": "identity_268269", "image": "DVQA/images/bar_train_00005918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In at least one category, how many objects are favored by more than 9 people?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 27.90625, "loss": 3.1272873878479004 }, { "id": "identity_19926", "image": "IconQA/images/39203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure the line. The line is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.15625, "loss": 2.4911022186279297 }, { "id": "identity_221071", "image": "Super-CLEVR/images/superCLEVR_new_000170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest object in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.734375, "loss": 3.2601113319396973 }, { "id": "identity_291361", "image": "PMC-VQA/images/PMC6737727_F3_36291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How would you describe the signal in the image that resembles a slab?\nChoices:\n(A) High-signal \n(B) Low-signal \n(C) Irregular \n(D) Circular" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.859375, "loss": 4.678149223327637 }, { "id": "identity_242565", "image": "VQA2.0/images/304134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding bikes and how many people are walking?" }, { "from": "gpt", "value": "The answer is 2 people riding bikes, 2 people walking" } ], "clip_score": 28.28125, "loss": 1.4891353845596313 }, { "id": "identity_160444", "image": "UniGeo/images/2166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that M and N are the midpoints of AD and BC respectively in quadrilateral ABCD, and AB = 4, DC = 2, what is the ratio of the area of triangle AMN to the area of quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is The ratio of the area of triangle AMN to quadrilateral ABCD is 5/12" } ], "clip_score": 32.6875, "loss": 1.265130877494812 }, { "id": "identity_151743", "image": "ScienceQA/images/6357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the hole in the handsaw's handle?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 29.890625, "loss": 3.642728090286255 }, { "id": "identity_164721", "image": "Geometry3K/images/1001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment YG if YZ = 21 and XZ = 39?" }, { "from": "gpt", "value": "The answer is 32.86" } ], "clip_score": 28.796875, "loss": 2.222871780395508 }, { "id": "identity_104920", "image": "A-OKVQA/images/365217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the closest player's shorts?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.71875, "loss": 4.3269124031066895 }, { "id": "identity_154578", "image": "ScienceQA/images/285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phenotype ratio for the offspring if B represents black body color and b represents white body color?" }, { "from": "gpt", "value": "The answer is 3:1" } ], "clip_score": 31.234375, "loss": 1.7133517265319824 }, { "id": "identity_6450", "image": "FigureQA/images/62370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Brown have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.953125, "loss": 3.882183790206909 }, { "id": "identity_47627", "image": "ChartQA/images/07712434000343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Upper secondary and Lower secondary schools with access to computers for pedagogical purposes?" }, { "from": "gpt", "value": "The answer is 11.75%" } ], "clip_score": 35.5, "loss": 1.9428178071975708 }, { "id": "identity_14815", "image": "IconQA/images/106930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is eight (_)." }, { "from": "gpt", "value": "The answer is o'clock" } ], "clip_score": 30.859375, "loss": 3.103975534439087 }, { "id": "identity_175227", "image": "TabMWP/images/36098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of watermelons harvested?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 32.28125, "loss": 2.343485116958618 }, { "id": "identity_130761", "image": "A-OKVQA/images/256983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap worn by the person in the foreground?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.078125, "loss": 3.629302978515625 }, { "id": "identity_233227", "image": "VizWiz/images/VizWiz_train_00016677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blades does the white fan have, and what is the shape of the object on the floor in front of the fan?" }, { "from": "gpt", "value": "The answer is Four blades, cross-like pattern" } ], "clip_score": 32.5625, "loss": 3.498891592025757 }, { "id": "identity_52046", "image": "DVQA/images/bar_train_00182810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'com' have a higher value for 'deal' than 'spirit'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.609375, "loss": 3.1726012229919434 }, { "id": "identity_314737", "image": "VQA-AS/images/6669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person on the skateboard falling off of it?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 29.15625, "loss": 3.1003665924072266 }, { "id": "identity_111897", "image": "A-OKVQA/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the chairs?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.390625, "loss": 5.108438968658447 }, { "id": "identity_231954", "image": "A-OKVQA/images/308366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are visible in the image, including the color of the plastic bowl containing an orange fruit?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.265625, "loss": 2.692887544631958 }, { "id": "identity_306098", "image": "AI2D/images/1101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the vein begin on a leaf?\nChoices:\n(A) impossible to answer\n(B) the roots\n(C) at the stem\n(D) on the bark" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.703125, "loss": 3.4851715564727783 }, { "id": "identity_98567", "image": "IconQA/images/67381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many diamonds are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.28125, "loss": 2.3024232387542725 }, { "id": "identity_170704", "image": "IconQA/images/73849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all shapes identical in size and shape?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.953125, "loss": 3.0137267112731934 }, { "id": "identity_208715", "image": "VQA2.0/images/401608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.71875, "loss": 2.395087957382202 }, { "id": "identity_86428", "image": "UniGeo/images/3070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠B = 40° and ∠CAE = 60°, what is the measure of ∠EAC?" }, { "from": "gpt", "value": "The answer is 20°" } ], "clip_score": 31.546875, "loss": 1.7319645881652832 }, { "id": "identity_321932", "image": "DocVQA/images/jlxw0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is Task Force F scheduled?" }, { "from": "gpt", "value": "The answer is 3:30" } ], "clip_score": 24.46875, "loss": 1.9233754873275757 }, { "id": "identity_297398", "image": "IconQA/images/58111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the nail in inches when measured to the nearest inch using the ruler?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 35.5625, "loss": 2.7602624893188477 }, { "id": "identity_137308", "image": "DVQA/images/bar_train_00050386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the 'fog' label across all categories?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 25.453125, "loss": 2.4905853271484375 }, { "id": "identity_163254", "image": "GeoQA+/images/2301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, if DE is parallel to AC and DF is parallel to AB, what is the measure of angle DFE?" }, { "from": "gpt", "value": "The answer is angle DFE = angle ABC = angle ACB degrees" } ], "clip_score": 32.0625, "loss": 2.8183932304382324 }, { "id": "identity_24960", "image": "FigureQA/images/65151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Medium Mint have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 4.4722981452941895 }, { "id": "identity_201706", "image": "PMC-VQA/images/PMC3299701_pone-0032796-g004_129436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What combination of imaging techniques is labeled at the top left of the image?" }, { "from": "gpt", "value": "The answer is GEFC + T1W" } ], "clip_score": 26.6875, "loss": 1.9229282140731812 }, { "id": "identity_332947", "image": "IconQA/images/44519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Ann is on the train one evening. Her watch displays the time. What is the time?\nChoices:\n(A) 8:00 P.M.\n(B) 8:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.90625, "loss": 3.7903544902801514 }, { "id": "identity_310652", "image": "TabMWP/images/14178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some individuals compared the quantities of tractors they possess. What is the average of these quantities?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.625, "loss": 2.9048335552215576 }, { "id": "identity_128562", "image": "VQA2.0/images/109450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.96875, "loss": 4.434364318847656 }, { "id": "identity_60294", "image": "FigureQA/images/50466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Dark Green section represent more than 50% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.28125, "loss": 3.18030047416687 }, { "id": "identity_264213", "image": "VizWiz/images/VizWiz_train_00006743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is written on this label? Is it indicating turmeric?" }, { "from": "gpt", "value": "The answer is turmrc" } ], "clip_score": 34.78125, "loss": 3.054513931274414 }, { "id": "identity_310200", "image": "GeoQA+/images/2758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O是一个三角形的外接圆,∠A=80°,则∠BOC等于()\nChoices:\n(A) 50°\n(B) 40°\n(C) 100°\n(D) 160°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.09375, "loss": 2.309215545654297 }, { "id": "identity_273471", "image": "VQA-AS/images/1934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color is the chair located on the right side?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 23.0625, "loss": 4.3799262046813965 }, { "id": "identity_7811", "image": "DocVQA/images/pkbg0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: WHEN WAS JOHNATHAN EVAN BORN?" }, { "from": "gpt", "value": "The answer is Friday 8/7/98" } ], "clip_score": 19.71875, "loss": 1.7634738683700562 }, { "id": "identity_59193", "image": "FigureQA/images/19558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Periwinkle section less than 40% of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.8125, "loss": 3.0860435962677 }, { "id": "identity_233659", "image": "ScienceQA/images/15517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these two objects have in common, and how does this property affect their interaction with light?" }, { "from": "gpt", "value": "The answer is Both objects scatter light due to their bumpy surfaces" } ], "clip_score": 23.5, "loss": 3.8286235332489014 }, { "id": "identity_83987", "image": "UniGeo/images/3472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AE=3 and CF=1, what is the length of line l that passes through the square ABCD?" }, { "from": "gpt", "value": "The answer is √10" } ], "clip_score": 34.6875, "loss": 2.3771591186523438 }, { "id": "identity_191032", "image": "A-OKVQA/images/71563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the pen holder on the desk?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.78125, "loss": 3.1637234687805176 }, { "id": "identity_113847", "image": "TextVQA/images/63977bd38ebed04b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different models of LG phones are displayed in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 26.703125, "loss": 4.283143043518066 }, { "id": "identity_330653", "image": "VQA-AS/images/7829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 18.296875, "loss": 3.0222067832946777 }, { "id": "identity_283946", "image": "TextVQA/images/42b87a51a6913d93.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is currently available or accessible?" }, { "from": "gpt", "value": "The answer is business" } ], "clip_score": 12.578125, "loss": 5.072576999664307 }, { "id": "identity_70194", "image": "TQA/images/question_images/human_system_circulatory_1380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the large vein that runs vertically down the middle of the body in the image?" }, { "from": "gpt", "value": "The answer is Inferior vena cava" } ], "clip_score": 34.15625, "loss": 1.4942748546600342 }, { "id": "identity_50423", "image": "FigureQA/images/86307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by Gold?" }, { "from": "gpt", "value": "The answer is Approximately 80" } ], "clip_score": 19.46875, "loss": 2.6163270473480225 }, { "id": "identity_323949", "image": "PMC-VQA/images/PMC4203932_Fig1_329373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is not suitable for individuals with a pacemaker implant?\nChoices:\n(A) CT scans \n(B) X-rays \n(C) MR images \n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.15625, "loss": 3.6666972637176514 }, { "id": "identity_75777", "image": "AI2D/images/2137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which stage in the lifecycle of a star is represented by the label \"Red Supergiant\"?" }, { "from": "gpt", "value": "The answer is The stage before Supernova" } ], "clip_score": 38.65625, "loss": 2.8732354640960693 }, { "id": "identity_73420", "image": "AI2D/images/1449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phenomenon occurs on the 23rd of September according to the diagram?" }, { "from": "gpt", "value": "The answer is Equinox" } ], "clip_score": 35.71875, "loss": 2.83233380317688 }, { "id": "identity_220463", "image": "PMC-VQA/images/PMC3239364_F1_119442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique was likely used for this histological image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin staining" } ], "clip_score": 31.578125, "loss": 1.2760028839111328 }, { "id": "identity_191576", "image": "PMC-VQA/images/PMC7567400_f2_276956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is indicated on the bottom-left corner of the image?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.734375, "loss": 3.450030565261841 }, { "id": "identity_313015", "image": "FigureQA/images/36771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.703125, "loss": 4.227626800537109 }, { "id": "identity_178819", "image": "TabMWP/images/33762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of miles biked per day by Joel over the past 7 days?" }, { "from": "gpt", "value": "The answer is 6.29 miles" } ], "clip_score": 35.0625, "loss": 2.0527119636535645 }, { "id": "identity_272721", "image": "PMC-VQA/images/PMC6077610_fig1_350507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of imaging is displayed in the top row of images? \nChoices:\n(A) Sagittal T1 \n(B) Coronal T1 \n(C) Sagittal FLAIR \n(D) Coronal FLAIR" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.171875, "loss": 3.8840689659118652 }, { "id": "identity_331945", "image": "MapQA/images/map_8392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have a higher value than that place?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.6484375, "loss": 2.7214226722717285 }, { "id": "identity_145762", "image": "DVQA/images/bar_train_00199448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the object 'cry' across all categories?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.4375, "loss": 2.1707160472869873 }, { "id": "identity_13704", "image": "Super-CLEVR/images/superCLEVR_new_000770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape of the tiny metal object right of the matte car the same as the small matte object that is on the right side of the green thing?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.71875, "loss": 5.35245418548584 }, { "id": "identity_274064", "image": "DocVQA/images/nxbx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the intended recipient of this memorandum?" }, { "from": "gpt", "value": "The answer is MR. PERRY GUSIC" } ], "clip_score": 28.3125, "loss": 4.04368782043457 }, { "id": "identity_20916", "image": "MapQA/images/map_4031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.28125, "loss": 3.0710558891296387 }, { "id": "identity_112556", "image": "TextVQA/images/0639ed62fbbbb695.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the advertisement contain any floral elements in its design?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.125, "loss": 3.257187604904175 }, { "id": "identity_151964", "image": "ScienceQA/images/8407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary method of movement for the animal in the image?" }, { "from": "gpt", "value": "The answer is Flight" } ], "clip_score": 24.34375, "loss": 3.3074800968170166 }, { "id": "identity_69700", "image": "ScienceQA/images/15144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the direction of the air flow from the object?" }, { "from": "gpt", "value": "The answer is Outwards from the nozzle" } ], "clip_score": 22.921875, "loss": 2.8715314865112305 }, { "id": "identity_255191", "image": "IconQA/images/77907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices are there in total for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 27.96875, "loss": 1.9139043092727661 }, { "id": "identity_159435", "image": "GeoQA+/images/4069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the sector AOB is rolled to form a cone, what is the slant height of the cone?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 29.84375, "loss": 2.0945162773132324 }, { "id": "identity_35811", "image": "PMC-VQA/images/PMC3540650_fig1_177766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is affected by diffuse swelling in the image? \nChoices:\n(A) Left superior oblique muscle \n(B) Right inferior oblique muscle \n(C) Left inferior oblique muscle \n(D) Right superior rectus muscle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.140625, "loss": 3.813422441482544 }, { "id": "identity_290031", "image": "TextVQA/images/f441f29812b385ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the number of someone wearing a white jersey?" }, { "from": "gpt", "value": "The answer is 96" } ], "clip_score": 22.34375, "loss": 2.95499587059021 }, { "id": "identity_291195", "image": "DocVQA/images/fjkm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the quote written on the specified page?" }, { "from": "gpt", "value": "The answer is \"Building a stronger industry and a better market\"" } ], "clip_score": 32.0, "loss": 1.1526249647140503 }, { "id": "identity_298918", "image": "TextVQA/images/61e78e1529ef6d0e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is in red?" }, { "from": "gpt", "value": "The answer is gooin" } ], "clip_score": 13.65625, "loss": 5.215181350708008 }, { "id": "identity_273892", "image": "GeoQA+/images/3611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, line a is parallel to line b. A right-angled triangle with a 60° angle (∠ABC = 60°) is placed as shown in the diagram, with points A and C lying on lines a and b respectively. If ∠1 = 15°, what is the degree measure of ∠2?\nChoices:\n(A) 30°\n(B) 60°\n(C) 45°\n(D) 75°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.625, "loss": 2.7258975505828857 }, { "id": "identity_290352", "image": "TQA/images/question_images/human_system_digestive_6072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What organ is responsible for absorption among the following choices: (A) mouth, (B) rectum, (C) stomach, or (D) small intestine?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.171875, "loss": 3.7095935344696045 }, { "id": "identity_119229", "image": "VQA2.0/images/55575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the largest animal displayed in the center of the image?" }, { "from": "gpt", "value": "The answer is Elephant" } ], "clip_score": 20.3125, "loss": 2.2716541290283203 }, { "id": "identity_17160", "image": "DocVQA/images/ggxn0226_78.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total Post-op?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 20.390625, "loss": 2.4548001289367676 }, { "id": "identity_199033", "image": "PMC-VQA/images/PMC6020456_fig2_333758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a fracture in the mandible on the CT image?" }, { "from": "gpt", "value": "The answer is No, there is no visible fracture in the mandible" } ], "clip_score": 32.0, "loss": 1.5322593450546265 }, { "id": "identity_162486", "image": "GeoQA+/images/603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AF bisects angle BAD and BE bisects angle ABC, and AF = 6 and BE = 8, what is the measure of angle AEB?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 32.5, "loss": 2.4704508781433105 }, { "id": "identity_144145", "image": "MapQA/images/map_7569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of manufacturers between 24.8% and 28.9%?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 36.90625, "loss": 2.3577499389648438 }, { "id": "identity_204102", "image": "PMC-VQA/images/PMC5115862_pone.0167063.g010_96948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the different colors used in the image?" }, { "from": "gpt", "value": "The answer is They represent different tagged molecules or structures" } ], "clip_score": 21.84375, "loss": 3.7714755535125732 }, { "id": "identity_40166", "image": "FigureQA/images/13186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.34375, "loss": 2.4341251850128174 }, { "id": "identity_276985", "image": "A-OKVQA/images/317851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the car's hood being open?\nChoices:\n(A) for repairs\n(B) capture rain\n(C) trap birds\n(D) loading luggage" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.171875, "loss": 4.4072113037109375 }, { "id": "identity_108393", "image": "Super-CLEVR/images/superCLEVR_new_000462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the body of the large bus on the right side of the image?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 27.265625, "loss": 2.9812419414520264 }, { "id": "identity_242115", "image": "GeoQA+/images/2398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,将直角三角板60°角的顶点放在圆心O上,斜边和一直角边分别与⊙O相交于A、B两点,P是优弧AB上任意一点(与A、B不重合),若∠APB的度数为30°,求∠AOB的度数。" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 26.828125, "loss": 1.529317021369934 }, { "id": "identity_335550", "image": "PMC-VQA/images/PMC4979837_F2_61537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which section of the spine is shown in the sagittal image (A)?\nChoices:\n(A) C1-T2 \n(B) L1-L5 \n(C) T12-L5 \n(D) C1-C7" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.765625, "loss": 4.221121788024902 }, { "id": "identity_330614", "image": "TabMWP/images/9406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Mandy kept a log of how many minutes she spent practicing horseback riding over the past 4 days. On which day did she practice the least?\nChoices:\n(A) Thursday\n(B) Friday\n(C) Saturday\n(D) Sunday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.3125, "loss": 3.69758939743042 }, { "id": "identity_72610", "image": "AI2D/images/1175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cellular process is associated with the structure labeled \"Golgi apparatus\"?" }, { "from": "gpt", "value": "The answer is Modifying, sorting, and packaging proteins and lipids" } ], "clip_score": 26.578125, "loss": 1.5374553203582764 }, { "id": "identity_32658", "image": "DocVQA/images/fyww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the due date(outline)?" }, { "from": "gpt", "value": "The answer is September 15, 2000" } ], "clip_score": 21.6875, "loss": 1.9533380270004272 }, { "id": "identity_142198", "image": "DocVQA/images/qngh0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of approval by the Quality Control Director?" }, { "from": "gpt", "value": "The answer is 4/24/01" } ], "clip_score": 24.40625, "loss": 2.121805429458618 }, { "id": "identity_29285", "image": "MapQA/images/map_5487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Ohio have the highest value in the USA ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.875, "loss": 2.520597457885742 }, { "id": "identity_108481", "image": "VQA2.0/images/71128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the animal doing?" }, { "from": "gpt", "value": "The answer is Jumping or leaping" } ], "clip_score": 16.65625, "loss": 2.691091299057007 }, { "id": "identity_173265", "image": "IconQA/images/2179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select a marble without looking, what is the probability of picking a marble from the bottom row?" }, { "from": "gpt", "value": "The answer is 1/3 or 0.33" } ], "clip_score": 24.6875, "loss": 1.3078629970550537 }, { "id": "identity_292350", "image": "DocVQA/images/yllg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the authorization number?" }, { "from": "gpt", "value": "The answer is 539" } ], "clip_score": 17.53125, "loss": 2.911212921142578 }, { "id": "identity_48916", "image": "DocVQA/images/hjxn0226_29.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of cases for ANCA-FT CIU Wright Cremascoli in the period 2010-2013?" }, { "from": "gpt", "value": "The answer is 2.6%" } ], "clip_score": 21.390625, "loss": 1.8890241384506226 }, { "id": "identity_337041", "image": "PlotQA/images/8538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it the case that in every year, the combined net incurrence of foreign liabilities in Samoa and another country is greater than the net incurrence of foreign liabilities in a third country?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.03125, "loss": 2.8497273921966553 }, { "id": "identity_29937", "image": "DocVQA/images/ssbw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: when was the Project started?" }, { "from": "gpt", "value": "The answer is monday, february 11, 2002" } ], "clip_score": 18.6875, "loss": 1.3927725553512573 }, { "id": "identity_206612", "image": "TextVQA/images/33723febf95151ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the flooring outside the court area?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.734375, "loss": 3.6175875663757324 }, { "id": "identity_43927", "image": "FigureQA/images/82791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Medium Seafoam always increasing?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.640625, "loss": 3.333230495452881 }, { "id": "identity_248265", "image": "VQA2.0/images/18839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of equipment are being used in the pictures, and what are they?" }, { "from": "gpt", "value": "The answer is three: snowboard, skis, ski poles" } ], "clip_score": 27.15625, "loss": 2.103386640548706 }, { "id": "identity_285304", "image": "TextVQA/images/99371dbae815a74b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which product is being promoted in the advertisement?" }, { "from": "gpt", "value": "The answer is no text in image" } ], "clip_score": 20.875, "loss": 4.459840297698975 }, { "id": "identity_324595", "image": "VQA2.0/images/145538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this truck?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 24.734375, "loss": 3.293780565261841 }, { "id": "identity_89967", "image": "GeoQA+/images/5249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of diagonal AC of square ABCD?" }, { "from": "gpt", "value": "The answer is √10" } ], "clip_score": 30.328125, "loss": 2.1575396060943604 }, { "id": "identity_338243", "image": "ScienceQA/images/10779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each pair. Which of the following statements is accurate?\nChoices:\n(A) The magnitude of the magnetic force is smaller in one pair.\n(B) The magnitude of the magnetic force is the same in both pairs.\n(C) The magnitude of the magnetic force is smaller in the other pair." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.703125, "loss": 3.791785478591919 }, { "id": "identity_57368", "image": "FigureQA/images/82592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Forest Green at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 33.46875, "loss": 2.5197067260742188 }, { "id": "identity_126236", "image": "Super-CLEVR/images/superCLEVR_new_000337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that is the same color as the largest airplane?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.46875, "loss": 2.9756548404693604 }, { "id": "identity_171693", "image": "IconQA/images/9864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows of dots are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.9375, "loss": 2.3368093967437744 }, { "id": "identity_299425", "image": "ScienceQA/images/19668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.609375, "loss": 3.8430328369140625 }, { "id": "identity_21556", "image": "ScienceQA/images/11541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Saint Vincent and the Grenadines\n(B) Barbados\n(C) Saint Lucia\n(D) Cuba" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.046875, "loss": 4.344629764556885 }, { "id": "identity_247402", "image": "PMC-VQA/images/PMC3178647_pone-0025316-g002_109634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key structural components labeled in the image, and how do they indicate that the P. tricornutum is in the process of dividing?" }, { "from": "gpt", "value": "The answer is Mitochondria, Plastid, Nucleus indicate dividing P. tricornutum" } ], "clip_score": 31.96875, "loss": 1.9053716659545898 }, { "id": "identity_47395", "image": "PlotQA/images/93112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of years displayed on the X-axis?" }, { "from": "gpt", "value": "The answer is 2015 to 2018" } ], "clip_score": 19.796875, "loss": 1.7729668617248535 }, { "id": "identity_193534", "image": "A-OKVQA/images/30973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are visible on the bed?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.5, "loss": 4.726179599761963 }, { "id": "identity_172199", "image": "IconQA/images/19310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangle vertices present in the image?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 26.6875, "loss": 2.206023693084717 }, { "id": "identity_174267", "image": "IconQA/images/26216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of squares in the top two rows?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.578125, "loss": 2.4790287017822266 }, { "id": "identity_32306", "image": "TabMWP/images/15884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Quincy kept track of the number of games his soccer team won each year. According to the table, what was the rate of change between 2016 and 2017?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 41.34375, "loss": 2.971902847290039 }, { "id": "identity_248368", "image": "PMC-VQA/images/PMC8593536_F1_113266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential complications that can arise from the injury causing the wide bony space in the image, and how might these complications be addressed medically?" }, { "from": "gpt", "value": "The answer is Infection, nerve damage, misalignment, bone healing issues; antibiotics, surgery, physical therapy, monitoring" } ], "clip_score": 26.40625, "loss": 2.3189759254455566 }, { "id": "identity_249818", "image": "UniGeo/images/2086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在▱ABCD中,对角线AC与BD相交于点O,E是边CD的中点,连结OE.若∠ABC=50°,∠BAC=80°,求∠DOE的度数。" }, { "from": "gpt", "value": "The answer is 25°" } ], "clip_score": 25.84375, "loss": 2.0237138271331787 }, { "id": "identity_133304", "image": "VizWiz/images/VizWiz_train_00000973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object in the image?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 22.609375, "loss": 3.5393035411834717 }, { "id": "identity_164920", "image": "UniGeo/images/1266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB is the diameter of circle O and AC is a tangent to circle O at point A, what is the length of segment BD if the radius of the circle is r?" }, { "from": "gpt", "value": "The answer is r" } ], "clip_score": 34.0625, "loss": 3.172924518585205 }, { "id": "identity_186099", "image": "A-OKVQA/images/337126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus in the center of the image?" }, { "from": "gpt", "value": "The answer is White or light grey" } ], "clip_score": 24.34375, "loss": 4.05500602722168 }, { "id": "identity_58563", "image": "DocVQA/images/pmyc0227_136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between ICNND \"Acceptable\" Level and NRC Standard for Vitamin A?" }, { "from": "gpt", "value": "The answer is 34.91%" } ], "clip_score": 25.46875, "loss": 2.060091018676758 }, { "id": "identity_287243", "image": "AI2D/images/721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who among the following is Jasper?\nChoices:\n(A) b\n(B) e\n(C) a\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.1953125, "loss": 3.7026290893554688 }, { "id": "identity_304965", "image": "DocVQA/images/rlvm0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Project Manager according to this?" }, { "from": "gpt", "value": "The answer is Frank Beal" } ], "clip_score": 16.984375, "loss": 3.733593702316284 }, { "id": "identity_201193", "image": "A-OKVQA/images/162753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing striped uniforms?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.25, "loss": 4.7416229248046875 }, { "id": "identity_201157", "image": "A-OKVQA/images/231682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific model of motorcycle is the officer riding?" }, { "from": "gpt", "value": "The answer is Harley-Davidson police model" } ], "clip_score": 27.4375, "loss": 2.279029130935669 }, { "id": "identity_132106", "image": "Super-CLEVR/images/superCLEVR_new_001515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle in the image that is the same color as the bus?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.8125, "loss": 3.3532309532165527 }, { "id": "identity_194775", "image": "A-OKVQA/images/80470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the scarf worn by the person on the right?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 11.859375, "loss": 3.384321928024292 }, { "id": "identity_79027", "image": "GeoQA+/images/441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ABCD is a parallelogram with vertices A, B, and D on circle O, and vertex C on the diameter BE of circle O, what is the length of the diameter BE?" }, { "from": "gpt", "value": "The answer is 2 times the radius of circle O" } ], "clip_score": 35.09375, "loss": 1.8916807174682617 }, { "id": "identity_194380", "image": "A-OKVQA/images/340998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.296875, "loss": 3.215862989425659 }, { "id": "identity_262363", "image": "MapQA/images/map_7746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which value is the lowest in the USA?" }, { "from": "gpt", "value": "The answer is 21.7%-38.8%" } ], "clip_score": 28.203125, "loss": 2.348837375640869 }, { "id": "identity_289088", "image": "DocVQA/images/fkbw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On what date was the article published?" }, { "from": "gpt", "value": "The answer is march 26, 1977" } ], "clip_score": 23.390625, "loss": 1.5141851902008057 }, { "id": "identity_317531", "image": "DocVQA/images/hqlb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned in the bottom right?" }, { "from": "gpt", "value": "The answer is 5/22/70" } ], "clip_score": 22.6875, "loss": 2.169315814971924 }, { "id": "identity_42684", "image": "DVQA/images/bar_train_00012190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the bar labeled \"fool\"?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.8125, "loss": 2.4901139736175537 }, { "id": "identity_40347", "image": "FigureQA/images/15780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Bubblegum section larger than the Orchid section?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.5, "loss": 3.3185036182403564 }, { "id": "identity_272200", "image": "TQA/images/abc_question_images/parts_fish_12911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the pelvic fins:\n(A) W\n(B) N\n(C) P\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.767434597015381 }, { "id": "identity_321286", "image": "DocVQA/images/ymwn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the \"complete Reports (session leaders)\" on this day?" }, { "from": "gpt", "value": "The answer is 5:15 PM - 6:15 PM" } ], "clip_score": 21.6875, "loss": 1.3678237199783325 }, { "id": "identity_243743", "image": "TabMWP/images/22847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pull-ups performed by all participants in the competition?" }, { "from": "gpt", "value": "The answer is 933" } ], "clip_score": 23.984375, "loss": 2.515378713607788 }, { "id": "identity_78022", "image": "TQA/images/abc_question_images/nuclear_energy_17093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What label represents the body of water?" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 20.453125, "loss": 2.5982956886291504 }, { "id": "identity_162693", "image": "UniGeo/images/2521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,∠CDB=40°,则∠ACB的度数是多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 29.3125, "loss": 1.6310956478118896 }, { "id": "identity_294861", "image": "PMC-VQA/images/PMC6684521_F1_20545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image shown? \nOptions:\n(A) Microtubules in RNFF1 cell line,\n(B) Dysfunctional telomeres in MR39 cell line,\n(C) Signal intensities of individual chromosomes,\n(D) Spread of cells on 9th passage." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.46875, "loss": 4.922292232513428 }, { "id": "identity_336560", "image": "PMC-VQA/images/PMC4100749_F1_306570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the lesion present in this area?\nChoices:\n(A) Hypodense \n(B) Hematoma \n(C) Hyperdense \n(D) Hyperintense" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.296875, "loss": 4.46014404296875 }, { "id": "identity_271536", "image": "IconQA/images/25671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dots?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 25.0625, "loss": 2.0464770793914795 }, { "id": "identity_261491", "image": "VQA-AS/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the dog currently engaged in?" }, { "from": "gpt", "value": "The answer is jumping" } ], "clip_score": 22.21875, "loss": 2.5396180152893066 }, { "id": "identity_77477", "image": "TQA/images/question_images/photosynthesis_1266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the plant is responsible for absorbing water from the soil?" }, { "from": "gpt", "value": "The answer is Roots" } ], "clip_score": 29.453125, "loss": 2.868210554122925 }, { "id": "identity_308631", "image": "TextVQA/images/04750d0d531a2b89.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of league is this?" }, { "from": "gpt", "value": "The answer is football" } ], "clip_score": 22.953125, "loss": 3.798006057739258 }, { "id": "identity_228100", "image": "ScienceQA/images/10150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted ocean and list all the continents that border it" }, { "from": "gpt", "value": "The answer is Antarctica, South America, Africa, Australia" } ], "clip_score": 33.875, "loss": 2.084576368331909 }, { "id": "identity_229007", "image": "UniGeo/images/53.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, 圆锥模具的母线长为10, 底面半径为5, 该圆锥模具的体积是多少?" }, { "from": "gpt", "value": "The answer is \\(\\frac{125\\sqrt{3}}{3} \\pi\\)" } ], "clip_score": 20.890625, "loss": 2.624877452850342 }, { "id": "identity_195456", "image": "PMC-VQA/images/PMC4245066_F3_339365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main anatomical structure visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Circle of Willis" } ], "clip_score": 32.09375, "loss": 2.6925151348114014 }, { "id": "identity_335072", "image": "TabMWP/images/18783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy 5 gray sweatshirts?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 35.46875, "loss": 2.150325298309326 }, { "id": "identity_18187", "image": "ScienceQA/images/16803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is highlighted?\nChoices:\n(A) New Hampshire\n(B) Pennsylvania\n(C) Vermont\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.6875, "loss": 3.9968504905700684 }, { "id": "identity_312431", "image": "ChartQA/images/multi_col_20008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest Q1 average daily rate and the lowest Q4 average daily rate in hotels in Toronto from 2012 to 2017?" }, { "from": "gpt", "value": "The answer is 41" } ], "clip_score": 24.203125, "loss": 2.842869281768799 }, { "id": "identity_17171", "image": "DVQA/images/bar_train_00183982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart presented in a percentage scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.015625, "loss": 3.5162155628204346 }, { "id": "identity_259980", "image": "A-OKVQA/images/365206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visible indicators in the image that suggest only foot traffic is allowed on this street at this time, and how do they collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is Pedestrian elements, absence of vehicles, barriers, and layout" } ], "clip_score": 19.09375, "loss": 4.259455680847168 }, { "id": "identity_161462", "image": "Geometry3K/images/786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is a chord of circle O with a radius of 10 and m∠AOB is 60°, what is the length of segment AX, where X is the midpoint of chord AB?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 39.21875, "loss": 2.103019952774048 }, { "id": "identity_90818", "image": "Geometry3K/images/1464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line segment that is tangent to the circle?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 34.84375, "loss": 1.6694444417953491 }, { "id": "identity_130297", "image": "TextVQA/images/833011003958eb5e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the airline advertised on the clothing and backdrop?" }, { "from": "gpt", "value": "The answer is Volaris" } ], "clip_score": 24.953125, "loss": 2.7771332263946533 }, { "id": "identity_18821", "image": "A-OKVQA/images/138871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is sitting on the desk on the right is seen very little since the computer age?\nChoices:\n(A) typewriter\n(B) mouse pad\n(C) mouse\n(D) corded phone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.703125, "loss": 4.347397804260254 }, { "id": "identity_288512", "image": "PMC-VQA/images/PMC4830041_Fig1_20947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the large tumor situated?\nChoices:\n(A) In the pelvic cavity.\n(B) In the left ventricle.\n(C) In the left atrium.\n(D) In the right atrium." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.625, "loss": 4.145890235900879 }, { "id": "identity_21897", "image": "TextVQA/images/e6b1ae1ea0e0c099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the catcher's number?" }, { "from": "gpt", "value": "The answer is no catch" } ], "clip_score": 23.671875, "loss": 4.932275295257568 }, { "id": "identity_337381", "image": "TextVQA/images/eef6025efea187ee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this perfume?" }, { "from": "gpt", "value": "The answer is no 19" } ], "clip_score": 34.25, "loss": 2.8522982597351074 }, { "id": "identity_123593", "image": "VizWiz/images/VizWiz_train_00014962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much protein does this yogurt contain per 100g serving?" }, { "from": "gpt", "value": "The answer is 4g" } ], "clip_score": 35.25, "loss": 2.2356297969818115 }, { "id": "identity_176796", "image": "TabMWP/images/31044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in cost between the most expensive and least expensive item on the menu?" }, { "from": "gpt", "value": "The answer is $2.88" } ], "clip_score": 21.03125, "loss": 2.276740550994873 }, { "id": "identity_56105", "image": "DVQA/images/bar_train_00037600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'stores' store have consistent sales across all items?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.625, "loss": 3.038395643234253 }, { "id": "identity_75605", "image": "AI2D/images/3661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the blood vessels that carry deoxygenated blood to the lungs" }, { "from": "gpt", "value": "The answer is Pulmonary Artery" } ], "clip_score": 29.9375, "loss": 1.5778428316116333 }, { "id": "identity_255785", "image": "IconQA/images/5207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each flamingo eats 3 fish per day, how many fish do all the flamingos eat in 5 days?" }, { "from": "gpt", "value": "The answer is 45 fish" } ], "clip_score": 39.8125, "loss": 1.93384850025177 }, { "id": "identity_163195", "image": "GeoQA+/images/3270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若四边形ABCD是一个正方形,且对角线AC、BD的长都为20cm,则四边形EFGH的边长是多少厘米?" }, { "from": "gpt", "value": "The answer is 7.07厘米" } ], "clip_score": 23.671875, "loss": 2.222020149230957 }, { "id": "identity_147184", "image": "DocVQA/images/mhkm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which entity is the certificate issued?" }, { "from": "gpt", "value": "The answer is CITIZENS' COMMISSION ON SCIENCE, LAW, AND FOOD SUPPLY" } ], "clip_score": 28.859375, "loss": 1.833362102508545 }, { "id": "identity_100404", "image": "TabMWP/images/11187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the shop sell the most ice cream cones?" }, { "from": "gpt", "value": "The answer is Saturday" } ], "clip_score": 31.03125, "loss": 2.7885851860046387 }, { "id": "identity_271713", "image": "DVQA/images/bar_train_00165645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, counting from the bottom, is labeled as the third?" }, { "from": "gpt", "value": "The answer is track" } ], "clip_score": 21.46875, "loss": 3.9292914867401123 }, { "id": "identity_94234", "image": "TabMWP/images/3964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 highlighters are removed from each desk drawer, how many highlighters will be in 4 desk drawers?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 31.984375, "loss": 2.1845133304595947 }, { "id": "identity_259773", "image": "GeoQA+/images/5555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图:△ABC中,DE∥BC,已知AE=6,AD:DB=3:4,若BC=14,则AB的长为多少?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 30.53125, "loss": 2.556408166885376 }, { "id": "identity_79018", "image": "UniGeo/images/2223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a ball is rolled down the slope, what is the vertical distance it would have descended after traveling 65m along the slope?" }, { "from": "gpt", "value": "The answer is 25m" } ], "clip_score": 30.8125, "loss": 2.0638961791992188 }, { "id": "identity_336665", "image": "A-OKVQA/images/566931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the person in the water wants to copy what the others are doing, what does she need?\nChoices:\n(A) headband\n(B) surfboard\n(C) bracelet\n(D) anklet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.640625, "loss": 4.151957035064697 }, { "id": "identity_154903", "image": "ScienceQA/images/1547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are used to represent the magnets in the image?" }, { "from": "gpt", "value": "The answer is Green and Purple" } ], "clip_score": 21.1875, "loss": 2.105947971343994 }, { "id": "identity_154501", "image": "ScienceQA/images/10479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the amphibian shown in a resting or active position?" }, { "from": "gpt", "value": "The answer is Resting" } ], "clip_score": 29.859375, "loss": 2.563161611557007 }, { "id": "identity_269052", "image": "VizWiz/images/VizWiz_train_00019509.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the title of this book?" }, { "from": "gpt", "value": "The answer is big print address book" } ], "clip_score": 26.6875, "loss": 3.1267333030700684 }, { "id": "identity_240340", "image": "ScienceQA/images/10518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the given states in order from north to south" }, { "from": "gpt", "value": "The answer is Kentucky, Arkansas, Alabama, Arizona" } ], "clip_score": 30.015625, "loss": 1.6063733100891113 }, { "id": "identity_115553", "image": "VQA2.0/images/231524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of street are the cyclists on?" }, { "from": "gpt", "value": "The answer is A multi-lane city street" } ], "clip_score": 28.28125, "loss": 2.7231898307800293 }, { "id": "identity_292281", "image": "PMC-VQA/images/PMC6082884_FIG1_351942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is observed in the right lung field?\nChoices:\n(A) Interstitial prominence\n(B) Nodular opacities\n(C) Dense peripheral pleural-based opacity\n(D) Consolidation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.21875, "loss": 4.062717437744141 }, { "id": "identity_143763", "image": "DocVQA/images/lghx0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the project title for which the check was issued?" }, { "from": "gpt", "value": "The answer is Influence of Nutritional Variables on Vitamin D Metabolism and Action in Dogs and Humans" } ], "clip_score": 19.71875, "loss": 2.475254535675049 }, { "id": "identity_246723", "image": "GeoQA+/images/5684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD是⊙O的内接四边形,BE是⊙O的直径,连接AE.若∠BCD=2∠BAD,且∠BAE的度数是∠DAE的2倍,则∠BAE的度数是多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 26.640625, "loss": 2.109928607940674 }, { "id": "identity_304791", "image": "DocVQA/images/fxwg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the header?" }, { "from": "gpt", "value": "The answer is R. J. REYNOLDS BRAND DISTRIBUTION AGREEMENT" } ], "clip_score": 22.59375, "loss": 1.1831026077270508 }, { "id": "identity_196592", "image": "VQA2.0/images/543747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.375, "loss": 2.333671808242798 }, { "id": "identity_128710", "image": "VQA2.0/images/106463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the police motorcycles moving or stationary?" }, { "from": "gpt", "value": "The answer is Moving" } ], "clip_score": 25.9375, "loss": 2.792548179626465 }, { "id": "identity_6531", "image": "VizWiz/images/VizWiz_train_00008960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code in the picture?" }, { "from": "gpt", "value": "The answer is duqint" } ], "clip_score": 23.125, "loss": 4.128978729248047 }, { "id": "identity_232988", "image": "UniGeo/images/1377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BC是⊙O的直径,点D在⊙O上,AB是⊙O的切线,B为切点,连接CD并延长交AB于点A,若∠BOD=100°,求∠CAD的度数。" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 25.0, "loss": 1.8173891305923462 }, { "id": "identity_16426", "image": "FigureQA/images/4514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Brown less than Dark Turquoise?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.578125, "loss": 3.7470829486846924 }, { "id": "identity_34295", "image": "DVQA/images/bar_train_00040083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of profit in film?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 21.59375, "loss": 2.6928651332855225 }, { "id": "identity_214786", "image": "VizWiz/images/VizWiz_train_00003309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of jelly beans are visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.484375, "loss": 2.263150930404663 }, { "id": "identity_292050", "image": "CLEVR-Math/images/CLEVR_train_062954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 cylinders, how many total objects will there be?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.59375, "loss": 2.055318832397461 }, { "id": "identity_100185", "image": "TabMWP/images/13170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which zoo has the highest total cost for one child and one senior ticket combined?" }, { "from": "gpt", "value": "The answer is Atlanta Zoo" } ], "clip_score": 38.5625, "loss": 2.3808512687683105 }, { "id": "identity_236399", "image": "Super-CLEVR/images/superCLEVR_new_000097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the size of the brown metal tandem bike to the red scooter and determine which one is larger" }, { "from": "gpt", "value": "The answer is the brown metal tandem bike is larger" } ], "clip_score": 25.109375, "loss": 1.5524330139160156 }, { "id": "identity_304816", "image": "TabMWP/images/16962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A county agency recorded the money donated to several charitable causes over time. Which cause raised more money in May, this or education?\nChoices:\n(A) education\n(B) this" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.703125, "loss": 3.7984650135040283 }, { "id": "identity_192714", "image": "PMC-VQA/images/PMC4557590_f4-cln_70p654_420034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lobe of the brain is labeled in the image?" }, { "from": "gpt", "value": "The answer is Superior Parietal Lobe" } ], "clip_score": 32.5, "loss": 1.9868106842041016 }, { "id": "identity_184249", "image": "PMC-VQA/images/PMC7244456_Fig1_176475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any other bones that may be affected in the image?" }, { "from": "gpt", "value": "The answer is No other bones affected" } ], "clip_score": 29.390625, "loss": 3.368483304977417 }, { "id": "identity_52255", "image": "DocVQA/images/nlcn0226_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the limit of perchlorate in water based on abnormal brain development in offspring, as supported by the data?" }, { "from": "gpt", "value": "The answer is 1 ppb" } ], "clip_score": 18.578125, "loss": 2.1880440711975098 }, { "id": "identity_149239", "image": "DocVQA/images/xtkg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections are listed on page 4?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.484375, "loss": 2.145867347717285 }, { "id": "identity_79814", "image": "GEOS/images/058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle AEC?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.515625, "loss": 2.4453506469726562 }, { "id": "identity_225777", "image": "DocVQA/images/ppnh0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address, including the city, state, and ZIP code, of W STORMA?" }, { "from": "gpt", "value": "The answer is 11527 Garfield Ave, Wauwatosa, Wis, 53226" } ], "clip_score": 15.2265625, "loss": 1.411103367805481 }, { "id": "identity_111107", "image": "PMC-VQA/images/PMC5412004_f0005_158662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the brain is predominantly visible in this image?" }, { "from": "gpt", "value": "The answer is Cerebral hemispheres" } ], "clip_score": 33.96875, "loss": 1.9084964990615845 }, { "id": "identity_296380", "image": "VQA2.0/images/386722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what time was this photograph taken?" }, { "from": "gpt", "value": "The answer is afternoon" } ], "clip_score": 18.953125, "loss": 3.7390034198760986 }, { "id": "identity_107150", "image": "Super-CLEVR/images/superCLEVR_new_001450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have exactly three wheels?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.765625, "loss": 2.4658915996551514 }, { "id": "identity_291771", "image": "PlotQA/images/59700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is represented by the 3rd bar from the top in the year 2010?" }, { "from": "gpt", "value": "The answer is Belize" } ], "clip_score": 20.875, "loss": 5.093133926391602 }, { "id": "identity_196053", "image": "A-OKVQA/images/390515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vehicle parked directly behind the black vehicle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.984375, "loss": 3.542363405227661 }, { "id": "identity_26415", "image": "PMC-VQA/images/PMC5758251_F5_260883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to the dural sac in the flexed position according to the caption? \nChoices:\n(A) Compression by the posterior wall of the L4 VB \n(B) Expansion due to decreased spinal pressure \n(C) No change in position or size \n(D) Compression of the anterior wall of the L4 VB " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.828125, "loss": 3.5008544921875 }, { "id": "identity_128435", "image": "VizWiz/images/VizWiz_train_00014628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the speaker placed on in the image?" }, { "from": "gpt", "value": "The answer is The surface is not visible in the image" } ], "clip_score": 26.25, "loss": 2.8192977905273438 }, { "id": "identity_129451", "image": "A-OKVQA/images/521301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the carriage have?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.046875, "loss": 3.9598231315612793 }, { "id": "identity_112730", "image": "PMC-VQA/images/PMC7644296_FI190005-2_291992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical procedure being depicted in the image?" }, { "from": "gpt", "value": "The answer is The image depicts an angiography procedure" } ], "clip_score": 30.046875, "loss": 2.548034191131592 }, { "id": "identity_160966", "image": "GeoQA+/images/3757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, with BE and CF as altitudes and M as the midpoint of BC, what is the ratio of the area of triangle EFM to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 33.1875, "loss": 1.5799944400787354 }, { "id": "identity_245171", "image": "DocVQA/images/pmdv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days in total did Dr. W. J. Darby spend on trips for Vanderbilt in July 1980?" }, { "from": "gpt", "value": "The answer is 5 days" } ], "clip_score": 27.171875, "loss": 2.1378138065338135 }, { "id": "identity_81267", "image": "UniGeo/images/1062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠B = 130° in the inscribed quadrilateral ABCD, what is the measure of ∠BAD?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 26.9375, "loss": 1.8251862525939941 }, { "id": "identity_134329", "image": "DVQA/images/bar_train_00019583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of 'stock' for both 'course' and 'symbol'?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 25.8125, "loss": 1.6491315364837646 }, { "id": "identity_125007", "image": "VQA-RAD/images/synpic24967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the horizontal structure located in the lower third of the image?" }, { "from": "gpt", "value": "The answer is Pancreas" } ], "clip_score": 25.828125, "loss": 3.079210042953491 }, { "id": "identity_87214", "image": "Geometry3K/images/84.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the slope of the line represented by the equation 2y - 11/5 = 12 - 3x?" }, { "from": "gpt", "value": "The answer is The slope is -3/2" } ], "clip_score": 30.953125, "loss": 2.788478374481201 }, { "id": "identity_247082", "image": "ScienceQA/images/1475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and name its capital city" }, { "from": "gpt", "value": "The answer is Santo Domingo" } ], "clip_score": 28.0, "loss": 3.123105764389038 }, { "id": "identity_114367", "image": "VizWiz/images/VizWiz_train_00009512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the fabric in the foreground?" }, { "from": "gpt", "value": "The answer is Striped and geometric pattern" } ], "clip_score": 24.078125, "loss": 3.3846213817596436 }, { "id": "identity_51312", "image": "MapQA/images/map_10507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of California?" }, { "from": "gpt", "value": "The answer is 76-78" } ], "clip_score": 25.453125, "loss": 2.3630619049072266 }, { "id": "identity_228379", "image": "FigureQA/images/89323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average value on the y-axis?" }, { "from": "gpt", "value": "The answer is Green Yellow" } ], "clip_score": 28.65625, "loss": 2.41090989112854 }, { "id": "identity_100311", "image": "TabMWP/images/21022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who collected the least number of cans of food?" }, { "from": "gpt", "value": "The answer is Elena and Simon" } ], "clip_score": 35.84375, "loss": 2.75923490524292 }, { "id": "identity_126855", "image": "VQA2.0/images/88754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is visible on the jersey of the person in the foreground?" }, { "from": "gpt", "value": "The answer is The number on the jersey is 3" } ], "clip_score": 17.796875, "loss": 1.9610522985458374 }, { "id": "identity_214110", "image": "PMC-VQA/images/PMC7362635_Fig1_214567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which lobe of the brain is the abnormality located?" }, { "from": "gpt", "value": "The answer is The abnormality is located in the frontal lobe" } ], "clip_score": 31.46875, "loss": 1.9812335968017578 }, { "id": "identity_27803", "image": "VQA2.0/images/43664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do you need all these plates to serve one pizza?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.140625, "loss": 3.408292293548584 }, { "id": "identity_44567", "image": "FigureQA/images/45275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the color Peru occupy more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 37.8125, "loss": 3.0185909271240234 }, { "id": "identity_266677", "image": "DocVQA/images/fhng0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the delivery date?" }, { "from": "gpt", "value": "The answer is 11/10/97" } ], "clip_score": 18.25, "loss": 1.997368574142456 }, { "id": "identity_54992", "image": "FigureQA/images/29401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Orange Red section the largest in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.46875, "loss": 3.075338840484619 }, { "id": "identity_95277", "image": "TabMWP/images/4056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students collected more than 60 bottles?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.890625, "loss": 2.4678313732147217 }, { "id": "identity_121757", "image": "VQA2.0/images/222629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of parking meter is in the foreground of the image?" }, { "from": "gpt", "value": "The answer is A single-space electronic parking meter" } ], "clip_score": 31.859375, "loss": 2.580034017562866 }, { "id": "identity_59726", "image": "PlotQA/images/92113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph show a consistent increase in net taxes for all countries?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.421875, "loss": 3.0015106201171875 }, { "id": "identity_221945", "image": "A-OKVQA/images/527086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage container is on the table?" }, { "from": "gpt", "value": "The answer is Bottle" } ], "clip_score": 14.8671875, "loss": 2.195213556289673 }, { "id": "identity_231023", "image": "Super-CLEVR/images/superCLEVR_new_001950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the sizes of the yellow motorbike and the green car, and determine which one is larger. Additionally, identify the smallest object in the image and describe its color" }, { "from": "gpt", "value": "The answer is Yellow motorbike is larger; smallest object is gray with purple" } ], "clip_score": 28.8125, "loss": 3.2422211170196533 }, { "id": "identity_224174", "image": "DVQA/images/bar_train_00104379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'fate' algorithm across all datasets, and how does it compare to the average accuracy of the 'chest' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is Both averages are equal" } ], "clip_score": 34.4375, "loss": 4.099931240081787 }, { "id": "identity_298152", "image": "A-OKVQA/images/303439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what way are all these animals similar?\nChoices:\n(A) name\n(B) dogs\n(C) color\n(D) birds" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.5703125, "loss": 3.666856050491333 }, { "id": "identity_3681", "image": "TextVQA/images/a3ce7908f1edb464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what number is on the white t-shirt?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 12.6875, "loss": 2.2108991146087646 }, { "id": "identity_280461", "image": "AI2D/images/1867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is a decomposer in the food web? \n(A) Fox\n(B) Worm\n(C) Human\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.125, "loss": 3.529315233230591 }, { "id": "identity_32023", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body was imaged in the CT scan?\nChoices:\n(A) Brain \n(B) Chest \n(C) Abdomen \n(D) Legs " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5625, "loss": 3.988003969192505 }, { "id": "identity_149600", "image": "DocVQA/images/qsnc0227_69.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total Niacin consumption in milligrams for 17 persons according to the Recipe Method?" }, { "from": "gpt", "value": "The answer is 449.8 mg" } ], "clip_score": 20.671875, "loss": 1.906138300895691 }, { "id": "identity_212950", "image": "PMC-VQA/images/PMC7344590_diagnostics-10-00385-f001_207846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of midline shift in the image?" }, { "from": "gpt", "value": "The answer is No midline shift" } ], "clip_score": 20.828125, "loss": 3.2752394676208496 }, { "id": "identity_81178", "image": "UniGeo/images/2008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠BEC?" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 33.78125, "loss": 2.288874864578247 }, { "id": "identity_325255", "image": "MapQA/images/map_11894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in this region?" }, { "from": "gpt", "value": "The answer is 74-77" } ], "clip_score": 23.984375, "loss": 2.2899069786071777 }, { "id": "identity_18552", "image": "IconQA/images/69552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.4375, "loss": 4.001010894775391 }, { "id": "identity_231187", "image": "VQA2.0/images/484273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in the picture, and how many of them are engaged in cooking activities?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.859375, "loss": 3.894735336303711 }, { "id": "identity_191690", "image": "TextVQA/images/97844cef7a4c8568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the product inside the packaging?" }, { "from": "gpt", "value": "The answer is Dongle-shaped" } ], "clip_score": 22.5625, "loss": 2.9506239891052246 }, { "id": "identity_332084", "image": "VQA2.0/images/555065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are these things parked?" }, { "from": "gpt", "value": "The answer is harbor" } ], "clip_score": 24.28125, "loss": 2.6607813835144043 }, { "id": "identity_166193", "image": "IconQA/images/43261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two dots are added to the second group, how many dots will be in that group?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.171875, "loss": 2.18841290473938 }, { "id": "identity_77083", "image": "TQA/images/question_images/earth_magnetic_field_6782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the line that divides the Earth into the Northern and Southern Hemispheres?" }, { "from": "gpt", "value": "The answer is Equator" } ], "clip_score": 30.6875, "loss": 2.686537504196167 }, { "id": "identity_336824", "image": "PMC-VQA/images/PMC7923829_Fig8_381636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure indicated by this level in the figure? \nChoices:\n(A) Medial malleolus of the tibia \n(B) Trochlear surface of the talus \n(C) Tarsometatarsal joint \n(D) Calcaneus tuberosity." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 3.9648513793945312 }, { "id": "identity_135336", "image": "DocVQA/images/jtjk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who signed the letter?" }, { "from": "gpt", "value": "The answer is David H. Kelsey" } ], "clip_score": 26.3125, "loss": 2.2319581508636475 }, { "id": "identity_174382", "image": "TabMWP/images/9385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did the number of employees with health care benefits see the largest increase from the previous year?" }, { "from": "gpt", "value": "The answer is 2013" } ], "clip_score": 39.96875, "loss": 1.7799830436706543 }, { "id": "identity_79851", "image": "Geometry3K/images/813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment LN when x = 2?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.921875, "loss": 2.638833999633789 }, { "id": "identity_28520", "image": "VQA2.0/images/364361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a wine show?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.03125, "loss": 3.206544876098633 }, { "id": "identity_329109", "image": "PMC-VQA/images/PMC5751172_ijms-18-02569-f010_259539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the cage is depicted in the image?\nChoices:\n(A) External surface \n(B) Top surface \n(C) Internal surface \n(D) Left surface" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.328125, "loss": 4.030161380767822 }, { "id": "identity_214061", "image": "VQA2.0/images/470762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the fire hydrant predominantly painted with?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 36.75, "loss": 3.7496752738952637 }, { "id": "identity_236449", "image": "AI2D/images/2177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long does it take for the larvae to become infective from the egg stage, and what stages do they pass through?" }, { "from": "gpt", "value": "The answer is 2-12 weeks, passing through 1st and 2nd stages" } ], "clip_score": 30.125, "loss": 2.009103775024414 }, { "id": "identity_247336", "image": "DocVQA/images/klvh0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration in seconds for the transmission mentioned in the report?" }, { "from": "gpt", "value": "The answer is 171" } ], "clip_score": 21.25, "loss": 3.072336435317993 }, { "id": "identity_281022", "image": "PlotQA/images/112051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gap between the highest and second highest income shares held by the top 20% of the population?" }, { "from": "gpt", "value": "The answer is 0.7700000000000031" } ], "clip_score": 25.96875, "loss": 1.7318212985992432 }, { "id": "identity_213261", "image": "A-OKVQA/images/88477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cow predominantly?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.265625, "loss": 4.194851398468018 }, { "id": "identity_301810", "image": "TabMWP/images/22532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A stock broker followed the stock prices of a certain set of companies. Which company's stock was cheaper on Wednesday, White and Company or Computer Data Company?\nChoices:\n(A) Computer Data Company\n(B) White and Company" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.640625, "loss": 4.053513526916504 }, { "id": "identity_124737", "image": "A-OKVQA/images/111030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the arrow on the blue sign pointing towards?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 17.921875, "loss": 3.139772891998291 }, { "id": "identity_80611", "image": "Geometry3K/images/1517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a square is inscribed in the circle, what is the side length of the square?" }, { "from": "gpt", "value": "The answer is 8.5 cm" } ], "clip_score": 30.515625, "loss": 2.202098846435547 }, { "id": "identity_212543", "image": "VizWiz/images/VizWiz_train_00005497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible droplets on the surface near the test?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.09375, "loss": 2.996014356613159 }, { "id": "identity_222773", "image": "A-OKVQA/images/57224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the skier's outfit on the right?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.546875, "loss": 3.1743435859680176 }, { "id": "identity_75862", "image": "TQA/images/abc_question_images/human_system_digestive_13720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label marks the tube that carries food from the mouth to the stomach?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.328125, "loss": 2.4272353649139404 }, { "id": "identity_188778", "image": "VQA-AS/images/5151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the plants' pots?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.203125, "loss": 3.2454237937927246 }, { "id": "identity_209215", "image": "PMC-VQA/images/PMC3423035_F1_150699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hydrocephalus in this image?" }, { "from": "gpt", "value": "The answer is No evidence of hydrocephalus" } ], "clip_score": 31.9375, "loss": 2.586014986038208 }, { "id": "identity_15270", "image": "ScienceQA/images/806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) St. Louis\n(B) New Orleans\n(C) Houston\n(D) New York City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.390625, "loss": 4.053309917449951 }, { "id": "identity_84436", "image": "GeoQA+/images/4058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠EPD = 35°, what is the measure of ∠AED?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.46875, "loss": 1.6775319576263428 }, { "id": "identity_330232", "image": "IconQA/images/869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.75, "loss": 3.967172145843506 }, { "id": "identity_310326", "image": "A-OKVQA/images/50355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red sign with a minus symbol on it typically indicate?\nChoices:\n(A) free parking\n(B) no parking\n(C) no entering\n(D) crossing permitted" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.8125, "loss": 4.4413042068481445 }, { "id": "identity_125632", "image": "PMC-VQA/images/PMC2894136_pntd-0000726-g005_67834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any markers or annotations on the image besides the arrows?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.734375, "loss": 3.00498628616333 }, { "id": "identity_164115", "image": "UniGeo/images/2142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,∠AOB=30°,点P为∠AOB内一点,OP=10,求∠APB的度数是多少?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 26.59375, "loss": 1.7977546453475952 }, { "id": "identity_146776", "image": "DocVQA/images/zjpc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person to whom the letter is primarily addressed?" }, { "from": "gpt", "value": "The answer is The Permanent Secretary" } ], "clip_score": 21.859375, "loss": 2.771702527999878 }, { "id": "identity_244382", "image": "A-OKVQA/images/545237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown on the clock on the tall building?" }, { "from": "gpt", "value": "The answer is 6:00" } ], "clip_score": 20.109375, "loss": 1.5345172882080078 }, { "id": "identity_172261", "image": "TabMWP/images/23577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of wooden beads per bag?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.78125, "loss": 2.1170847415924072 }, { "id": "identity_9066", "image": "A-OKVQA/images/441736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which person is in the greatest danger?\nChoices:\n(A) left man\n(B) right man\n(C) farthest man\n(D) rear man" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.7890625, "loss": 3.7094566822052 }, { "id": "identity_102425", "image": "IconQA/images/63392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cheetahs facing left or right in the image?" }, { "from": "gpt", "value": "The answer is Equal number facing left and right" } ], "clip_score": 31.546875, "loss": 2.9709208011627197 }, { "id": "identity_116595", "image": "VQA2.0/images/351352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible cloud in the sky?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.3984375, "loss": 2.9587535858154297 }, { "id": "identity_207293", "image": "A-OKVQA/images/35711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.21875, "loss": 2.550320863723755 }, { "id": "identity_210002", "image": "A-OKVQA/images/195296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruits are displayed on the table besides bananas?" }, { "from": "gpt", "value": "The answer is Limes and unripe mangoes" } ], "clip_score": 29.5625, "loss": 2.3622117042541504 }, { "id": "identity_133726", "image": "VizWiz/images/VizWiz_train_00005901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any water droplets visible on the flowers?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.453125, "loss": 3.2388737201690674 }, { "id": "identity_148714", "image": "ChartQA/images/two_col_102588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the risk index score for the year 2016?" }, { "from": "gpt", "value": "The answer is 6.23" } ], "clip_score": 23.734375, "loss": 2.241339683532715 }, { "id": "identity_154640", "image": "ScienceQA/images/14592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the masses of the particles in Sample A and Sample B the same?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.625, "loss": 3.1900877952575684 }, { "id": "identity_23324", "image": "VQA2.0/images/384374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number train is this?" }, { "from": "gpt", "value": "The answer is 883" } ], "clip_score": 17.265625, "loss": 2.3153233528137207 }, { "id": "identity_38829", "image": "FigureQA/images/84747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the slope of Medium Orchid line positive or negative?" }, { "from": "gpt", "value": "The answer is Negative" } ], "clip_score": 20.46875, "loss": 2.9346961975097656 }, { "id": "identity_250906", "image": "DVQA/images/bar_train_00044662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the percentage of people who prefer the most preferred object and the least preferred object in the whole chart?" }, { "from": "gpt", "value": "The answer is 80%" } ], "clip_score": 33.375, "loss": 2.1318719387054443 }, { "id": "identity_336170", "image": "A-OKVQA/images/7819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What makes the pavement appear so shiny?\nChoices:\n(A) water\n(B) glass\n(C) ice\n(D) plastic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.636114597320557 }, { "id": "identity_113419", "image": "TextVQA/images/0bd60dc408159a20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the box?" }, { "from": "gpt", "value": "The answer is Open and slightly damaged" } ], "clip_score": 23.90625, "loss": 3.294682502746582 }, { "id": "identity_130845", "image": "TextVQA/images/6ba5823ac7be1bf9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What day and time is Adult Skate available?" }, { "from": "gpt", "value": "The answer is Tuesdays from 8-9pm" } ], "clip_score": 26.140625, "loss": 1.415481448173523 }, { "id": "identity_108291", "image": "A-OKVQA/images/230522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cow in the image?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 28.546875, "loss": 2.9022254943847656 }, { "id": "identity_199020", "image": "PMC-VQA/images/PMC7060429_fig2_130174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a medical condition in the image?" }, { "from": "gpt", "value": "The answer is Yes, possible fluid collection or mass" } ], "clip_score": 25.46875, "loss": 4.465154647827148 }, { "id": "identity_278076", "image": "TQA/images/question_images/human_system_respiratory_6198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe of the lung is situated between the superior lobe and the inferior lobe? Is it:\n(A) middle lobe\n(B) carina\n(C) diaphragm\n(D) trachea" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 4.482558250427246 }, { "id": "identity_253939", "image": "IconQA/images/57731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, what is the probability that both will be black?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.40625, "loss": 2.6830692291259766 }, { "id": "identity_215900", "image": "A-OKVQA/images/282279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a towel visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.984375, "loss": 3.574755907058716 }, { "id": "identity_294817", "image": "TabMWP/images/14609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on Latrell's lunch expenses recorded in the table, what was the difference in spending between Wednesday and Thursday?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 37.4375, "loss": 3.1003894805908203 }, { "id": "identity_7567", "image": "PMC-VQA/images/PMC5714954_Fig6_248498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is superimposed on the x-y slice along with the located particles? \nChoices:\n(A) a yellow line \n(B) a red arrow \n(C) a black \"+\" \n(D) a green circle " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.6875, "loss": 3.8291525840759277 }, { "id": "identity_78327", "image": "TQA/images/abc_question_images/parts_neuron_17225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter in the diagram indicates the synaptic vesicles?" }, { "from": "gpt", "value": "The answer is U" } ], "clip_score": 30.5, "loss": 3.7357513904571533 }, { "id": "identity_324758", "image": "TabMWP/images/1012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The teachers at a middle school counted how many computers they had in their classrooms. What is the most frequent number of computers?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.53125, "loss": 1.9854240417480469 }, { "id": "identity_226420", "image": "GeoQA+/images/3971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C在⊙O上,CO的延长线交AB于点D,BD=BO,∠A=50°,求∠C的度数。" }, { "from": "gpt", "value": "The answer is 15°" } ], "clip_score": 32.15625, "loss": 2.4031734466552734 }, { "id": "identity_164075", "image": "UniGeo/images/546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume of the cone-shaped ice cream?" }, { "from": "gpt", "value": "The answer is 12π cm^3" } ], "clip_score": 31.0, "loss": 2.382953643798828 }, { "id": "identity_205836", "image": "PMC-VQA/images/PMC6398457_BIO036939F2_443775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar representing in the image?" }, { "from": "gpt", "value": "The answer is The scale bar represents a measurement, likely micrometers" } ], "clip_score": 13.296875, "loss": 2.588059902191162 }, { "id": "identity_106389", "image": "VQA-AS/images/5562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the blanket laid out on the ground?" }, { "from": "gpt", "value": "The answer is Checkered" } ], "clip_score": 21.0625, "loss": 2.439194440841675 }, { "id": "identity_314250", "image": "TQA/images/question_images/cellular_respiration_9043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the process between the Electron transport chain and Glycolysis?\nChoices:\n(A) Oxygen\n(B) Citric acid cycle\n(C) Glucose\n(D) Electrons transferred by NADH" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.96875, "loss": 3.9344475269317627 }, { "id": "identity_209002", "image": "VQA2.0/images/88274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tent structure above the market stalls?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.65625, "loss": 3.499985933303833 }, { "id": "identity_207710", "image": "A-OKVQA/images/569747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the graffiti on the signpost at the front?" }, { "from": "gpt", "value": "The answer is Blue and red" } ], "clip_score": 17.9375, "loss": 3.6211793422698975 }, { "id": "identity_51581", "image": "PlotQA/images/111473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is Population% (of Cohort)" } ], "clip_score": 21.28125, "loss": 3.6898367404937744 }, { "id": "identity_217101", "image": "A-OKVQA/images/204036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the back of the bus promoting?" }, { "from": "gpt", "value": "The answer is Ruby Mart" } ], "clip_score": 26.8125, "loss": 4.530488014221191 }, { "id": "identity_146710", "image": "DocVQA/images/hncc0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the petitioner?" }, { "from": "gpt", "value": "The answer is International Plaza, Englewood Cliffs, N.J. 07632" } ], "clip_score": 25.359375, "loss": 2.0632622241973877 }, { "id": "identity_4194", "image": "GeoQA+/images/2175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线l1∥l2,点A在直线l1上,以点A为圆心,适当的半径画弧,分别交直线l1、l2于点C、B两点,连接AC、BC,若∠ABC=58°,则∠1的大小为()\nChoices:\n(A) 54°\n(B) 58°\n(C) 64°\n(D) 68°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.265625, "loss": 2.751054048538208 }, { "id": "identity_267029", "image": "GeoQA+/images/1100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If an arc is formed by drawing arcs with a radius equal to the side length of an equilateral triangle with its vertices as centers, creating a closed figure called a \"cam,\" and the side length of the equilateral triangle is 2, what is the perimeter of the \"cam\"?\nChoices:\n(A) 2π\n(B) 4π\n(C) \\frac{4π}{3}\n(D) \\frac{2π}{3}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.6875, "loss": 2.817204236984253 }, { "id": "identity_322244", "image": "DocVQA/images/zqvw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the alphanumeric sequence given at the bottom right?" }, { "from": "gpt", "value": "The answer is DWRITE 001877" } ], "clip_score": 19.109375, "loss": 3.9004197120666504 }, { "id": "identity_133863", "image": "VizWiz/images/VizWiz_train_00015019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand that produces this hand sanitizer?" }, { "from": "gpt", "value": "The answer is up&up" } ], "clip_score": 35.59375, "loss": 2.802752733230591 }, { "id": "identity_284687", "image": "A-OKVQA/images/340440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the room being so small?\nChoices:\n(A) temporary arrangement\n(B) college dorm\n(C) small tenants\n(D) low rent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 4.515083312988281 }, { "id": "identity_47286", "image": "DocVQA/images/hqvf0065_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the building and room number for the conference on 12/18?" }, { "from": "gpt", "value": "The answer is Bldg. 102 Room: A 102" } ], "clip_score": 20.96875, "loss": 2.5384416580200195 }, { "id": "identity_279437", "image": "TextVQA/images/02748cc67037e249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the name of the store?" }, { "from": "gpt", "value": "The answer is brach's" } ], "clip_score": 26.734375, "loss": 2.5963001251220703 }, { "id": "identity_246394", "image": "A-OKVQA/images/248767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify the zone and provide three distinct features that support your identification" }, { "from": "gpt", "value": "The answer is Tourist zone with hotel, pedestrian traffic, and urban environment" } ], "clip_score": 22.75, "loss": 2.9622721672058105 }, { "id": "identity_161262", "image": "GeoQA+/images/620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle CBA is 40 degrees, what is the length of arc AC?" }, { "from": "gpt", "value": "The answer is 80 degrees" } ], "clip_score": 34.6875, "loss": 2.188455581665039 }, { "id": "identity_46093", "image": "MapQA/images/map_5044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state with the lowest percentage in the South?" }, { "from": "gpt", "value": "The answer is 56.3%-63.7%" } ], "clip_score": 27.640625, "loss": 1.7391527891159058 }, { "id": "identity_124134", "image": "PMC-VQA/images/PMC5102410_pone.0165239.g001_93702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is Magnetic Resonance Imaging (MRI)" } ], "clip_score": 27.765625, "loss": 1.0192478895187378 }, { "id": "identity_52481", "image": "FigureQA/images/20950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Hot Pink greater than Light Sky Blue?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.640625, "loss": 3.4705002307891846 }, { "id": "identity_30404", "image": "A-OKVQA/images/3602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who should a crime be reported to?\nChoices:\n(A) pedestrian\n(B) police division\n(C) motorcyclist\n(D) car driver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.4142303466796875 }, { "id": "identity_125791", "image": "TextVQA/images/305fd8c750a8fe94.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the four-letter word written in red, starting with \"F\" near the bottom right corner?" }, { "from": "gpt", "value": "The answer is FREE" } ], "clip_score": 10.421875, "loss": 2.945131540298462 }, { "id": "identity_273537", "image": "DVQA/images/bar_train_00015245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of units sold by the worst selling item in the entire chart?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.296875, "loss": 2.371851921081543 }, { "id": "identity_111217", "image": "PMC-VQA/images/PMC6518670_Fig2_480121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images arranged in chronological order of a procedure?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.34375, "loss": 2.7190730571746826 }, { "id": "identity_69148", "image": "ScienceQA/images/14398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the individuals indoors or outdoors?" }, { "from": "gpt", "value": "The answer is Indoors" } ], "clip_score": 16.015625, "loss": 2.71138334274292 }, { "id": "identity_98277", "image": "IconQA/images/40886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were extended to the 7 centimeter mark, by how many centimeters would it be increased?" }, { "from": "gpt", "value": "The answer is 1.1 centimeters" } ], "clip_score": 29.609375, "loss": 1.882286787033081 }, { "id": "identity_178603", "image": "TabMWP/images/24630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy one of each item listed?" }, { "from": "gpt", "value": "The answer is $1.31" } ], "clip_score": 29.515625, "loss": 1.9494959115982056 }, { "id": "identity_145215", "image": "DocVQA/images/qrgf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of this letter?" }, { "from": "gpt", "value": "The answer is Esther F. Philpand" } ], "clip_score": 25.15625, "loss": 4.103085041046143 }, { "id": "identity_184650", "image": "PMC-VQA/images/PMC6206379_Fig4_387477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different symbols are used to mark specific features in this histological image?" }, { "from": "gpt", "value": "The answer is Four symbols" } ], "clip_score": 26.75, "loss": 4.309664726257324 }, { "id": "identity_273881", "image": "TQA/images/question_images/stratigraphy_9265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the layer in the diagram that is the oldest.\nChoices:\n(A) L\n(B) E\n(C) A\n(D) G" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.9375, "loss": 3.866403102874756 }, { "id": "identity_337129", "image": "DVQA/images/bar_train_00067600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars each a single solid color without any patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.796875, "loss": 3.559251308441162 }, { "id": "identity_297043", "image": "UniGeo/images/1707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, if angle A is 66° and point I is the incenter, what is the measure of angle BIC?\nChoices:\n(A) 114°\n(B) 122°\n(C) 123°\n(D) 132°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.6875, "loss": 2.6375575065612793 }, { "id": "identity_64943", "image": "ScienceQA/images/17338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the track suit?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.28125, "loss": 3.801176071166992 }, { "id": "identity_168606", "image": "IconQA/images/81674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares wide is the letter before the transformation?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.859375, "loss": 2.470189094543457 }, { "id": "identity_67453", "image": "AI2D/images/1014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the flower was formerly protecting the flower bud?" }, { "from": "gpt", "value": "The answer is Sepals" } ], "clip_score": 26.625, "loss": 3.512756109237671 }, { "id": "identity_37456", "image": "TabMWP/images/18021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After a scavenger hunt, a team counted the number of pine cones found by each of its members. How many people are there in all?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 34.59375, "loss": 2.8629205226898193 }, { "id": "identity_4318", "image": "FigureQA/images/18372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta less than Web Maroon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.796875, "loss": 3.9377188682556152 }, { "id": "identity_133649", "image": "Super-CLEVR/images/superCLEVR_new_000931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.828125, "loss": 2.9125521183013916 }, { "id": "identity_144937", "image": "FigureQA/images/73471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Hot Pink on the y-axis?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.3125, "loss": 3.762147903442383 }, { "id": "identity_222452", "image": "A-OKVQA/images/552446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the stadium indoor or outdoor?" }, { "from": "gpt", "value": "The answer is Outdoor" } ], "clip_score": 20.125, "loss": 2.7547638416290283 }, { "id": "identity_335059", "image": "VQA-AS/images/8540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it appropriate for them to wear long sleeves on a sunny day?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.796875, "loss": 2.991332530975342 }, { "id": "identity_8994", "image": "GeoQA+/images/77.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB∥CD,G为直线CD上一点,E为直线AB与直线CD之间一点,F为直线AB上方一点,∠F=40°,∠EGF=65°,则∠1+∠2=()\nChoices:\n(A) 40°\n(B) 55°\n(C) 65°\n(D) 75°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.5, "loss": 2.808250904083252 }, { "id": "identity_172438", "image": "IconQA/images/79194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the given shape be classified as a geometric solid?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.328125, "loss": 3.4432976245880127 }, { "id": "identity_271369", "image": "TabMWP/images/22848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many team members scored a minimum of 1 goal last season, as the soccer coach reviews their performance for the upcoming season?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 35.78125, "loss": 2.967268228530884 }, { "id": "identity_254124", "image": "AI2D/images/4716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the image that represents both \"ligule\" and \"auriculate\" leaf attachments" }, { "from": "gpt", "value": "The answer is none of above" } ], "clip_score": 33.0625, "loss": 3.768610715866089 }, { "id": "identity_10510", "image": "Super-CLEVR/images/superCLEVR_new_001921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the metallic object in front of the brown aeroplane?" }, { "from": "gpt", "value": "The answer is road bike" } ], "clip_score": 25.71875, "loss": 3.9465172290802 }, { "id": "identity_8675", "image": "VQA2.0/images/10069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the wearing stilettos?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.375, "loss": 3.5152697563171387 }, { "id": "identity_74230", "image": "AI2D/images/3316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organelle is labeled as \"Peroxisome\" and what is its primary function?" }, { "from": "gpt", "value": "The answer is Peroxisome; breaks down fatty acids and detoxifies" } ], "clip_score": 26.140625, "loss": 2.5156755447387695 }, { "id": "identity_244258", "image": "TabMWP/images/24263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Winston missed the 9:30 A.M. train at Ashland and needs to reach Dayton as soon as possible. How long will it take him to reach Dayton if he takes the next available train from Ashland?" }, { "from": "gpt", "value": "The answer is 3 hours 15 minutes" } ], "clip_score": 28.765625, "loss": 1.9488365650177002 }, { "id": "identity_2487", "image": "VQA-AS/images/16935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is behind the couch?" }, { "from": "gpt", "value": "The answer is coat rack" } ], "clip_score": 22.515625, "loss": 3.517774820327759 }, { "id": "identity_282295", "image": "PMC-VQA/images/PMC6124583_f2_363748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specific location of the ulcerated lesion among the given choices? \n(A) Body of stomach \n(B) Fundus of stomach \n(C) Incisura angularis \n(D) Greater curvature of stomach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.484375, "loss": 4.379700183868408 }, { "id": "identity_183626", "image": "PMC-VQA/images/PMC5671704_fig1_235707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see the ventricles of the brain in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.671875, "loss": 2.948437213897705 }, { "id": "identity_205437", "image": "TextVQA/images/488435f598c4e473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years of history does the book claim to cover?" }, { "from": "gpt", "value": "The answer is 3000 years" } ], "clip_score": 17.703125, "loss": 1.6984134912490845 }, { "id": "identity_35525", "image": "DocVQA/images/ztlb0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is consultants on toxaphene" } ], "clip_score": 21.890625, "loss": 3.9146087169647217 }, { "id": "identity_241981", "image": "FigureQA/images/67565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much is Dark Gold greater than Seafoam at x = 50?" }, { "from": "gpt", "value": "The answer is 0.3" } ], "clip_score": 30.375, "loss": 1.770438313484192 }, { "id": "identity_66615", "image": "AI2D/images/2728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase of the moon is depicted at the top right of the diagram?" }, { "from": "gpt", "value": "The answer is Waxing crescent" } ], "clip_score": 35.09375, "loss": 2.4505720138549805 }, { "id": "identity_171409", "image": "IconQA/images/98246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares tall is the letter 'J'?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.8125, "loss": 2.650657892227173 }, { "id": "identity_126640", "image": "PMC-VQA/images/PMC6720874_sensors-19-03522-f014_31921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are predominantly visible on the object?" }, { "from": "gpt", "value": "The answer is Red, green, and blue" } ], "clip_score": 26.015625, "loss": 3.0089271068573 }, { "id": "identity_36871", "image": "PMC-VQA/images/PMC7923829_Fig8_381636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the structure indicated by level 6 in the figure? \nChoices:\n(A) Medial malleolus of the tibia \n(B) Trochlear surface of the talus \n(C) Tarsometatarsal joint \n(D) Calcaneus tuberosity. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.28125, "loss": 3.9985511302948 }, { "id": "identity_286709", "image": "TabMWP/images/32384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each branch has 2 leaves, how many leaves are there in total on 4 branches?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 28.015625, "loss": 1.9443022012710571 }, { "id": "identity_280407", "image": "PMC-VQA/images/PMC4937445_f6_49207.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of images were captured for the microstructural analysis? \nOptions:\n(A) Transmission electron images\n(B) X-ray images\n(C) Secondary electron images\n(D) Scanning helium ion images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.71875, "loss": 3.810593366622925 }, { "id": "identity_72863", "image": "ScienceQA/images/11945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ultimate source of energy for the organisms in this food web?" }, { "from": "gpt", "value": "The answer is The sun" } ], "clip_score": 28.828125, "loss": 3.4515819549560547 }, { "id": "identity_176985", "image": "IconQA/images/22202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours apart did Betty and Steve go to the park?" }, { "from": "gpt", "value": "The answer is 1 hour" } ], "clip_score": 24.84375, "loss": 1.6111905574798584 }, { "id": "identity_25562", "image": "PMC-VQA/images/PMC3564814_F1_183850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in the image? \nChoices:\n(A) The upper pole of the left kidney \n(B) The lower pole of the right kidney \n(C) The middle pole of the left kidney \n(D) The middle pole of the right kidney " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.171875, "loss": 4.135663032531738 }, { "id": "identity_211518", "image": "A-OKVQA/images/556278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the traffic lights facing the camera?" }, { "from": "gpt", "value": "The answer is Red and Green" } ], "clip_score": 28.8125, "loss": 2.848268508911133 }, { "id": "identity_68767", "image": "ScienceQA/images/9816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the smaller country located southeast of the highlighted country?" }, { "from": "gpt", "value": "The answer is New Zealand" } ], "clip_score": 29.65625, "loss": 2.502824068069458 }, { "id": "identity_58364", "image": "MapQA/images/map_2067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Texas?" }, { "from": "gpt", "value": "The answer is 63.4%-66.3%" } ], "clip_score": 23.046875, "loss": 2.183467149734497 }, { "id": "identity_29954", "image": "TabMWP/images/17454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Chase buys 1.6 pounds of maple syrup. How much does he spend?" }, { "from": "gpt", "value": "The answer is 4.80" } ], "clip_score": 29.46875, "loss": 1.907548427581787 }, { "id": "identity_215858", "image": "PMC-VQA/images/PMC6206383_Fig24_387536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any abnormalities observed in the ventricular system of the brain in this image?" }, { "from": "gpt", "value": "The answer is No abnormalities observed" } ], "clip_score": 31.71875, "loss": 2.8802709579467773 }, { "id": "identity_5634", "image": "UniGeo/images/796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,CD是斜边AB上的中线,已知CD=2,AC=3,则sinB的值是()\nChoices:\n(A) \\frac{2}{3}\n(B) \\frac{3}{2}\n(C) \\frac{3}{4}\n(D) \\frac{4}{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.3125, "loss": 2.9040346145629883 }, { "id": "identity_274945", "image": "VizWiz/images/VizWiz_train_00019177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please assist me by reading this label? Thank you." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.0, "loss": 5.056337833404541 }, { "id": "identity_184261", "image": "PMC-VQA/images/PMC3552813_f4_180745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is outlined by the blue area at the top of the image?" }, { "from": "gpt", "value": "The answer is Epithelial tissue lining" } ], "clip_score": 28.875, "loss": 3.1189234256744385 }, { "id": "identity_286057", "image": "PMC-VQA/images/PMC5804657_Fig2_273284.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the green arrow in the CT scan in relation to the following choices: (A) Clavicle, (B) First rib, (C) Anterior scalene muscle, or (D) None of the above?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.265625, "loss": 3.546473264694214 }, { "id": "identity_86066", "image": "UniGeo/images/1850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知点M是线段AB的中点,N是线段AM上的点,且满足AN:MN=1:2,若线段MN=6cm,求线段BM的长度为多少厘米?" }, { "from": "gpt", "value": "The answer is 8cm" } ], "clip_score": 24.90625, "loss": 2.347257375717163 }, { "id": "identity_187043", "image": "PMC-VQA/images/PMC7878549_F2_366617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical imaging technique used to produce this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.8125, "loss": 2.28411602973938 }, { "id": "identity_160417", "image": "GeoQA+/images/2121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle AOB?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 31.578125, "loss": 1.7058460712432861 }, { "id": "identity_218711", "image": "VQA-AS/images/7975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the sun in the sky?" }, { "from": "gpt", "value": "The answer is To the left of the scene" } ], "clip_score": 24.625, "loss": 3.2867794036865234 }, { "id": "identity_16462", "image": "GeoQA+/images/58.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 某铁路路基的横断面是一个等腰梯形(如图),若腰的坡比为2:3,路基顶宽3米,高4米,则路基的下底宽为()\nChoices:\n(A) 7m\n(B) 9m\n(C) 12m\n(D) 15m" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.0, "loss": 2.462838888168335 }, { "id": "identity_197593", "image": "PMC-VQA/images/PMC6362127_Fig4_432139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is primarily visible in the images?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 26.078125, "loss": 3.3876357078552246 }, { "id": "identity_259995", "image": "ScienceQA/images/10490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image and list two other states that border this state" }, { "from": "gpt", "value": "The answer is Montpelier, New York, New Hampshire" } ], "clip_score": 28.0, "loss": 2.5500035285949707 }, { "id": "identity_41905", "image": "DVQA/images/bar_train_00121178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest preference for the 'event' category?" }, { "from": "gpt", "value": "The answer is amount" } ], "clip_score": 29.140625, "loss": 4.078606128692627 }, { "id": "identity_130043", "image": "VQA2.0/images/513424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What message is written on the speech bubble?" }, { "from": "gpt", "value": "The answer is Don't eat us, we're music!" } ], "clip_score": 28.625, "loss": 1.136620283126831 }, { "id": "identity_272596", "image": "A-OKVQA/images/579938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of art being depicted:\nOptions:\n(A) sand\n(B) painting\n(C) drawing\n(D) sculpture" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.140625, "loss": 4.307486534118652 }, { "id": "identity_179416", "image": "TabMWP/images/4341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you have a budget of $20, how many pounds of puffed rice can you buy?" }, { "from": "gpt", "value": "The answer is 5 pounds" } ], "clip_score": 30.9375, "loss": 2.4225082397460938 }, { "id": "identity_270437", "image": "ScienceQA/images/15129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\nChoices:\n(A) Austin\n(B) Oklahoma City\n(C) Arlington\n(D) Richmond" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.265625, "loss": 3.844886302947998 }, { "id": "identity_164372", "image": "GeoQA+/images/5454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of the semicircle and D is the midpoint of arc AC, what is the measure of angle ADC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 33.40625, "loss": 1.5349417924880981 }, { "id": "identity_332771", "image": "MapQA/images/map_1595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any data missing on the map?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.578125, "loss": 2.6129848957061768 }, { "id": "identity_193624", "image": "PMC-VQA/images/PMC7082865_fig3-2324709620912101_138516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of fluid accumulation in the lungs?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.625, "loss": 3.0205655097961426 }, { "id": "identity_263990", "image": "TabMWP/images/8501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If some teachers compared the number of years they have been teaching, what is the average of those numbers?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 30.765625, "loss": 2.4898180961608887 }, { "id": "identity_217505", "image": "VQA2.0/images/502916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of shot is the player attempting to execute?" }, { "from": "gpt", "value": "The answer is An overhead shot or serve" } ], "clip_score": 23.296875, "loss": 3.240286111831665 }, { "id": "identity_262220", "image": "PMC-VQA/images/PMC7325026_Fig2_201874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the correct view displayed for the Munchkin cat? \nChoices:\n(A) Anterior view\n(B) Ventral view\n(C) Lateral view\n(D) Dorsal view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.1875, "loss": 5.200084686279297 }, { "id": "identity_74080", "image": "AI2D/images/2675.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phenomenon is being illustrated by the diagram?" }, { "from": "gpt", "value": "The answer is Earth's orbit and seasons" } ], "clip_score": 32.90625, "loss": 2.8153204917907715 }, { "id": "identity_104755", "image": "VQA-AS/images/9854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is shown?" }, { "from": "gpt", "value": "The answer is Wood" } ], "clip_score": 23.0, "loss": 3.3709468841552734 }, { "id": "identity_33849", "image": "ScienceQA/images/18289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement describes the Tallgrass Prairie National Preserve ecosystem?\nChoices:\n(A) It has hot summers and cool winters.\n(B) It has soil that is poor in nutrients." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.109375, "loss": 4.332467555999756 }, { "id": "identity_62734", "image": "ScienceQA/images/4959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is shown connecting the two atoms?" }, { "from": "gpt", "value": "The answer is Single covalent bond" } ], "clip_score": 23.78125, "loss": 2.219935894012451 }, { "id": "identity_78964", "image": "Geometry3K/images/1458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side y in the triangle?" }, { "from": "gpt", "value": "The answer is 7√3" } ], "clip_score": 35.21875, "loss": 3.0308780670166016 }, { "id": "identity_319248", "image": "TextVQA/images/7d15b68bbae4c880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of this route?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 21.46875, "loss": 2.204806089401245 }, { "id": "identity_139945", "image": "DocVQA/images/yjpg0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the representative from the Public Health Service?" }, { "from": "gpt", "value": "The answer is Dr. W. H. Sebrell" } ], "clip_score": 26.65625, "loss": 2.8523738384246826 }, { "id": "identity_62304", "image": "DocVQA/images/ykdv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount of the monthly honorarium?" }, { "from": "gpt", "value": "The answer is 100.00" } ], "clip_score": 23.90625, "loss": 1.5932464599609375 }, { "id": "identity_234768", "image": "DVQA/images/bar_train_00112715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy for the 'shape' category across all datasets?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 30.21875, "loss": 2.6336004734039307 }, { "id": "identity_12395", "image": "IconQA/images/41267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.390625, "loss": 3.7285945415496826 }, { "id": "identity_225476", "image": "DocVQA/images/yscw0217_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount that can be invoiced upon submission of the paper to the target journal, and what is the total payment if the project is terminated after the first draft?" }, { "from": "gpt", "value": "The answer is $1,000 upon submission, $1,000 if terminated after first draft" } ], "clip_score": 21.453125, "loss": 1.6237887144088745 }, { "id": "identity_195691", "image": "A-OKVQA/images/332877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians visible on the sidewalk?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.265625, "loss": 3.136216640472412 }, { "id": "identity_65310", "image": "TQA/images/question_images/human_system_respiratory_6203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the diaphragm in the respiratory system?" }, { "from": "gpt", "value": "The answer is It aids in lung expansion during breathing" } ], "clip_score": 31.890625, "loss": 2.9049112796783447 }, { "id": "identity_166975", "image": "TabMWP/images/12750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of Paul's expenses for December?" }, { "from": "gpt", "value": "The answer is $70" } ], "clip_score": 32.0625, "loss": 2.439629077911377 }, { "id": "identity_228506", "image": "TQA/images/question_images/cycle_nitrogen_6708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do Fossil Fuel Emissions contribute to the nitrogen cycle, and what is the final form of nitrogen that results from this process?" }, { "from": "gpt", "value": "The answer is Nitrates (NO3-)" } ], "clip_score": 32.03125, "loss": 2.1122426986694336 }, { "id": "identity_188828", "image": "A-OKVQA/images/133061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure are the cows standing under?" }, { "from": "gpt", "value": "The answer is A shelter or shed" } ], "clip_score": 27.359375, "loss": 3.806302309036255 }, { "id": "identity_167195", "image": "IconQA/images/67879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is not pink?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 30.21875, "loss": 1.650686502456665 }, { "id": "identity_47525", "image": "DVQA/images/bar_train_00136806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of people who prefer the objects listed in the chart?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 31.078125, "loss": 2.680312156677246 }, { "id": "identity_183639", "image": "TextVQA/images/62579859904492f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full phrase written on the white banner?" }, { "from": "gpt", "value": "The answer is AMERICA Land of the free and of the brave" } ], "clip_score": 25.15625, "loss": 2.2118589878082275 }, { "id": "identity_338330", "image": "Geometry3K/images/1169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 4 \\sqrt { 29 }\n(B) 25\n(C) 33\n(D) \\sqrt { 1714 }" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.25, "loss": 2.934262752532959 }, { "id": "identity_5262", "image": "VQA2.0/images/339670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a lobby?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.453125, "loss": 3.592395782470703 }, { "id": "identity_273456", "image": "ChartQA/images/multi_col_60710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the blue and dark blue bars in the Netherlands combined?" }, { "from": "gpt", "value": "The answer is 22.81" } ], "clip_score": 23.046875, "loss": 2.1928019523620605 }, { "id": "identity_312882", "image": "DocVQA/images/gpyw0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When does the registration begin?" }, { "from": "gpt", "value": "The answer is 8.00 am" } ], "clip_score": 18.265625, "loss": 1.937657117843628 }, { "id": "identity_243083", "image": "Super-CLEVR/images/superCLEVR_new_000248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on all the vehicles combined in the image?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 21.984375, "loss": 1.9895023107528687 }, { "id": "identity_326543", "image": "IconQA/images/18680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 85" } ], "clip_score": 22.015625, "loss": 2.3572561740875244 }, { "id": "identity_18621", "image": "VizWiz/images/VizWiz_train_00016449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is spoon" } ], "clip_score": 30.53125, "loss": 2.382577657699585 }, { "id": "identity_305578", "image": "VQA-RAD/images/synpic24390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How wide is this compared to the thorax?" }, { "from": "gpt", "value": "The answer is It is less than half the width of the thorax" } ], "clip_score": 32.25, "loss": 2.096752405166626 }, { "id": "identity_214547", "image": "TextVQA/images/0166adf82f1557c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text visible on the train besides the letters \"LMS\"?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.484375, "loss": 2.66859769821167 }, { "id": "identity_258485", "image": "TabMWP/images/28713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of stickers in the collections?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 27.125, "loss": 2.344724416732788 }, { "id": "identity_140488", "image": "DocVQA/images/flwk0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the philosophy endorsed by the U.S. Surgeon General and the public health community mentioned in the text?" }, { "from": "gpt", "value": "The answer is Less ought to be better" } ], "clip_score": 23.375, "loss": 4.938222885131836 }, { "id": "identity_159651", "image": "GeoQA+/images/3898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the rectangle ABCD is cut along the diagonal AC, what is the area of each of the two resulting triangles?" }, { "from": "gpt", "value": "The answer is 96 square centimeters" } ], "clip_score": 32.1875, "loss": 2.393167495727539 }, { "id": "identity_27859", "image": "FigureQA/images/31838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.328125, "loss": 4.410264015197754 }, { "id": "identity_180243", "image": "CLEVR-Math/images/CLEVR_train_056035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red objects are present?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.796875, "loss": 2.2827091217041016 }, { "id": "identity_99170", "image": "IconQA/images/15970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 30 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 7:00" } ], "clip_score": 31.265625, "loss": 1.7248282432556152 }, { "id": "identity_326459", "image": "PMC-VQA/images/PMC3961443_f1-ol-07-04-1270_275948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the hypodense mass located?\nChoices:\n(A) left anterior bladder wall\n(B) right anterior bladder wall\n(C) posterior bladder wall\n(D) bladder base wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0, "loss": 4.5390543937683105 }, { "id": "identity_87769", "image": "UniGeo/images/2533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the side length of the larger square ABCD is 12 and BF is 3, what is the length of segment CD?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 36.71875, "loss": 2.1782379150390625 }, { "id": "identity_111347", "image": "TextVQA/images/03577bee21c7d19e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many microphones are visible on the podium?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.75, "loss": 4.224025726318359 }, { "id": "identity_108999", "image": "TextVQA/images/220c90884cded523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of store is located on the corner of the street?" }, { "from": "gpt", "value": "The answer is Grocery Store" } ], "clip_score": 20.265625, "loss": 2.2470862865448 }, { "id": "identity_275204", "image": "IconQA/images/101790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using dice. The line is approximately (_) dice long." }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.09375, "loss": 2.5484673976898193 }, { "id": "identity_227823", "image": "AI2D/images/121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relationship between the forces acting on the Earth and Moon in the diagram, and how do these forces affect their respective positions and movements?" }, { "from": "gpt", "value": "The answer is Forces balance to maintain stable Earth-Moon orbit" } ], "clip_score": 33.5, "loss": 4.13592529296875 }, { "id": "identity_290993", "image": "FigureQA/images/50646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Violet the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.203125, "loss": 4.468071937561035 }, { "id": "identity_327388", "image": "VQA2.0/images/343215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a promotion?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 11.515625, "loss": 3.2655844688415527 }, { "id": "identity_309524", "image": "DocVQA/images/tjxn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When did this get issued?" }, { "from": "gpt", "value": "The answer is thu, 25 may, 2006" } ], "clip_score": 19.453125, "loss": 2.526521921157837 }, { "id": "identity_257184", "image": "ScienceQA/images/15203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image and then determine the distance in miles from this capital to the capital of the neighboring state to the west" }, { "from": "gpt", "value": "The answer is 120 miles" } ], "clip_score": 27.640625, "loss": 2.779557466506958 }, { "id": "identity_19199", "image": "TabMWP/images/22595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A museum curator counted the number of paintings in each exhibit at the art museum. What is the smallest number of paintings?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 30.40625, "loss": 3.261110782623291 }, { "id": "identity_64101", "image": "TQA/images/question_images/cycle_nitrogen_6714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What could potentially interrupt the nitrogen cycle as suggested by the image?" }, { "from": "gpt", "value": "The answer is Antibiotics" } ], "clip_score": 26.234375, "loss": 2.1054556369781494 }, { "id": "identity_29115", "image": "Geometry3K/images/2073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that any segment that appears to be tangent is tangent.\nChoices:\n(A) 9\n(B) 45\n(C) 60\n(D) 105" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.078125, "loss": 3.0435562133789062 }, { "id": "identity_279244", "image": "UniGeo/images/1159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, circle O is the circumcircle of triangle ABC, with angle A measuring 50°. What is the measure of angle OCB?\nChoices:\n(A) 60°\n(B) 50°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.90625, "loss": 2.935295820236206 }, { "id": "identity_164577", "image": "Geometry3K/images/224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the arc JL?" }, { "from": "gpt", "value": "The answer is 13.5 inches" } ], "clip_score": 31.65625, "loss": 2.079155445098877 }, { "id": "identity_220050", "image": "PMC-VQA/images/PMC5909365_Fig5_304664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located directly below the white arrow?" }, { "from": "gpt", "value": "The answer is Small intestine" } ], "clip_score": 24.921875, "loss": 3.054497718811035 }, { "id": "identity_234399", "image": "ScienceQA/images/243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its capital city and the two countries closest to it geographically" }, { "from": "gpt", "value": "The answer is Jamaica, Kingston, Cuba, Haiti" } ], "clip_score": 33.5625, "loss": 2.013183355331421 }, { "id": "identity_208521", "image": "TextVQA/images/00406d1a5405c5a6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirts worn by the women?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.3125, "loss": 3.0117931365966797 }, { "id": "identity_48711", "image": "DocVQA/images/qtkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the institution associated with the author?" }, { "from": "gpt", "value": "The answer is Oregon Agricultural Experiment Station, Oregon State College, Corvallis, Oregon" } ], "clip_score": 24.09375, "loss": 1.2419615983963013 }, { "id": "identity_336486", "image": "A-OKVQA/images/441379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number on the outfit worn by the animals?\nChoices:\n(A) group number\n(B) age\n(C) bib number\n(D) height" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.4609375, "loss": 4.110687732696533 }, { "id": "identity_317209", "image": "PMC-VQA/images/PMC8199119_jcm-10-02421-f003_466488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image depicts mild Descemet's membrane detachment?\nChoices:\n(A) Image A \n(B) Image B \n(C) Image C \n(D) Image D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.125, "loss": 3.5708160400390625 }, { "id": "identity_190851", "image": "PMC-VQA/images/PMC4557590_f3-cln_70p654_420029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the midline of the brain be clearly seen in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.125, "loss": 2.9681715965270996 }, { "id": "identity_43388", "image": "DVQA/images/bar_train_00010292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the least percentage of people according to the graph?" }, { "from": "gpt", "value": "The answer is grade" } ], "clip_score": 32.34375, "loss": 3.1397693157196045 }, { "id": "identity_79725", "image": "Geometry3K/images/883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radius of circle S?" }, { "from": "gpt", "value": "The answer is The radius of circle S is 4" } ], "clip_score": 34.90625, "loss": 1.9209154844284058 }, { "id": "identity_286541", "image": "ScienceQA/images/11702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\nChoices:\n(A) Concord\n(B) Austin\n(C) Richmond\n(D) Arlington" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.140625, "loss": 4.140742301940918 }, { "id": "identity_338314", "image": "PlotQA/images/41701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is Gaseous fuel" } ], "clip_score": 15.7890625, "loss": 3.7999656200408936 }, { "id": "identity_44891", "image": "DocVQA/images/rhlg0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many members are listed in the Advisory Committee on Metabolism of Trauma?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.34375, "loss": 2.4295132160186768 }, { "id": "identity_15581", "image": "TabMWP/images/20776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each branch has 5 butterflies. How many butterflies are on 3 branches?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 32.875, "loss": 1.8199806213378906 }, { "id": "identity_98958", "image": "IconQA/images/45852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the squares in the second row were removed, how many squares would remain?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.015625, "loss": 2.325515031814575 }, { "id": "identity_44267", "image": "FigureQA/images/87005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Purple have a higher value than Seafoam at x-axis value 60?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.6875, "loss": 3.2349464893341064 }, { "id": "identity_95461", "image": "TabMWP/images/17331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which event overlaps with the Spanish club meeting?" }, { "from": "gpt", "value": "The answer is Football game" } ], "clip_score": 31.265625, "loss": 3.108618974685669 }, { "id": "identity_116417", "image": "PMC-VQA/images/PMC4686709_fig1_455026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the lesion indicated by the black arrow?" }, { "from": "gpt", "value": "The answer is Increased radiodensity compared to lung tissue" } ], "clip_score": 30.078125, "loss": 2.1152114868164062 }, { "id": "identity_304766", "image": "FigureQA/images/77182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Dark Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.453125, "loss": 3.735119342803955 }, { "id": "identity_324286", "image": "IconQA/images/38349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.625, "loss": 4.031132698059082 }, { "id": "identity_182940", "image": "PMC-VQA/images/PMC8121193_FIG2_440377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.734375, "loss": 2.844132661819458 }, { "id": "identity_7116", "image": "DVQA/images/bar_train_00047060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm luxury in the dataset day?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 34.8125, "loss": 2.9469311237335205 }, { "id": "identity_281208", "image": "ChartQA/images/multi_col_1240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage represented by the leftmost bar?" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 22.109375, "loss": 3.234680414199829 }, { "id": "identity_66531", "image": "TQA/images/abc_question_images/parts_worm_17321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'P' represent in the diagram?" }, { "from": "gpt", "value": "The answer is Anterior end or head" } ], "clip_score": 23.09375, "loss": 4.19807243347168 }, { "id": "identity_1057", "image": "AI2D/images/1912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, how does the energy flow\nChoices:\n(A) from insect to fungi\n(B) from fungi to insect\n(C) from bacteria to fish\n(D) from fish to algae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.59375, "loss": 3.1477603912353516 }, { "id": "identity_114577", "image": "PMC-VQA/images/PMC5703880_Fig8_244525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color seen in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.34375, "loss": 3.1969401836395264 }, { "id": "identity_128556", "image": "VizWiz/images/VizWiz_train_00014651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is Car" } ], "clip_score": 25.609375, "loss": 3.7850937843322754 }, { "id": "identity_329410", "image": "Geometry3K/images/295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 2\n(B) 3\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.34375, "loss": 3.01078462600708 }, { "id": "identity_248532", "image": "A-OKVQA/images/468548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in the process shown in the image, starting from the arrival of bananas to their final packaging?" }, { "from": "gpt", "value": "The answer is Arrival, inspection, washing, sorting, trimming, final inspection, packaging, labeling, storage, shipping" } ], "clip_score": 22.921875, "loss": 2.290682554244995 }, { "id": "identity_280417", "image": "MapQA/images/map_2938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value in the West?" }, { "from": "gpt", "value": "The answer is 540-658" } ], "clip_score": 23.03125, "loss": 2.5212314128875732 }, { "id": "identity_158168", "image": "AI2D/images/783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the image, what form of nitrogen is absorbed by the cow to produce protein?" }, { "from": "gpt", "value": "The answer is The cow absorbs nitrates" } ], "clip_score": 32.9375, "loss": 2.840850830078125 }, { "id": "identity_263747", "image": "A-OKVQA/images/463342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is the train scheduled to stop at this location?\nChoices:\n(A) Mexico\n(B) Spain\n(C) United States\n(D) England" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.1875, "loss": 4.053213119506836 }, { "id": "identity_60820", "image": "FigureQA/images/94553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Purple line intersect the Medium Blue line?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.453125, "loss": 1.924698829650879 }, { "id": "identity_295362", "image": "A-OKVQA/images/456756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what characteristic is this animal recognized?\nChoices:\n(A) spinning webs\n(B) long neck\n(C) gills\n(D) armored shell" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.342948913574219 }, { "id": "identity_59872", "image": "DocVQA/images/pgxg0224_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many one-on-one sessions are scheduled for Tuesday, 5/16/00?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.921875, "loss": 2.231886148452759 }, { "id": "identity_318044", "image": "IconQA/images/18533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these shapes are there?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.640625, "loss": 1.837664008140564 }, { "id": "identity_124091", "image": "TextVQA/images/010cfb13c3b9be8d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible license plate on the van?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.203125, "loss": 2.937831163406372 }, { "id": "identity_152139", "image": "ScienceQA/images/16697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the magnets are moved towards each other, what interaction will occur?" }, { "from": "gpt", "value": "The answer is Repulsion" } ], "clip_score": 21.625, "loss": 3.466507911682129 }, { "id": "identity_3874", "image": "DVQA/images/bar_train_00117077.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first element from the bottom in each stack of bars?" }, { "from": "gpt", "value": "The answer is patent" } ], "clip_score": 21.734375, "loss": 3.56762957572937 }, { "id": "identity_324970", "image": "DocVQA/images/sljf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mentioned date and time?" }, { "from": "gpt", "value": "The answer is 10/27/93 09:36:07" } ], "clip_score": 22.375, "loss": 2.101231813430786 }, { "id": "identity_146095", "image": "MapQA/images/map_6201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest range of bids?" }, { "from": "gpt", "value": "The answer is Any state in the range 59295-63584" } ], "clip_score": 29.828125, "loss": 2.756098508834839 }, { "id": "identity_71730", "image": "AI2D/images/3896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the plant is directly above the crown?" }, { "from": "gpt", "value": "The answer is leaf sheath" } ], "clip_score": 33.59375, "loss": 2.6377227306365967 }, { "id": "identity_208527", "image": "A-OKVQA/images/293072.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit can be seen on the cake?" }, { "from": "gpt", "value": "The answer is Strawberries" } ], "clip_score": 19.734375, "loss": 2.087620258331299 }, { "id": "identity_83661", "image": "GeoQA+/images/2447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠OBA = 50° in the circle with center O and triangle ABC, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 34.03125, "loss": 1.8296595811843872 }, { "id": "identity_294201", "image": "PMC-VQA/images/PMC8983883_F6_244852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique is utilized for patients with CVID and GLILD? \nOptions:\n(A) MRI imaging\n(B) PET imaging\n(C) CT imaging\n(D) X-ray imaging" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.609375, "loss": 3.587033748626709 }, { "id": "identity_128169", "image": "PMC-VQA/images/PMC8836482_jcm-11-00531-f001_191303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the artery shown in this image?" }, { "from": "gpt", "value": "The answer is ICAR" } ], "clip_score": 27.546875, "loss": 2.793055772781372 }, { "id": "identity_66906", "image": "TQA/images/abc_question_images/nuclear_energy_18118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many neutrons are released after the fission process represented by label \"M\"?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.78125, "loss": 2.0106661319732666 }, { "id": "identity_71312", "image": "AI2D/images/4288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the layer labeled directly below the OCEANIC PLATE?" }, { "from": "gpt", "value": "The answer is Asthenosphere" } ], "clip_score": 30.1875, "loss": 2.2570652961730957 }, { "id": "identity_94946", "image": "IconQA/images/14795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this geometric shape?" }, { "from": "gpt", "value": "The answer is Square pyramid" } ], "clip_score": 32.59375, "loss": 2.9869396686553955 }, { "id": "identity_195947", "image": "PMC-VQA/images/PMC4754989_Fig1_2503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Sagittal plane" } ], "clip_score": 30.90625, "loss": 3.07503604888916 }, { "id": "identity_184297", "image": "PMC-VQA/images/PMC8876035_jcm-11-01002-f001_205890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of hydrocephalus?" }, { "from": "gpt", "value": "The answer is No signs of hydrocephalus" } ], "clip_score": 32.75, "loss": 2.494032144546509 }, { "id": "identity_262760", "image": "TQA/images/question_images/cycle_rock_6733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After erosion and weathering, what do igneous rocks change into?\nChoices:\n(A) Metamorphic rocks\n(B) Sediments\n(C) Magma\n(D) Volcanic eruptions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 4.089954853057861 }, { "id": "identity_29962", "image": "TextVQA/images/fc103492574405e9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the players number?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 15.53125, "loss": 2.0279409885406494 }, { "id": "identity_282851", "image": "TabMWP/images/34402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which stop does the train arrive at 11.45 A.M. according to the provided schedule? Is it Sunset Street, Taylor Road, Lake Street, or Spruce Boulevard?\nChoices:\n(A) Sunset Street\n(B) Taylor Road\n(C) Lake Street\n(D) Spruce Boulevard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.21875, "loss": 3.9486591815948486 }, { "id": "identity_99710", "image": "TabMWP/images/22033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people did more than 30 push-ups?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 21.046875, "loss": 2.2852892875671387 }, { "id": "identity_290834", "image": "IconQA/images/80400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Estimate the number of marbles present.\nChoices:\n(A) approximately 40\n(B) approximately 70" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.125, "loss": 2.8763904571533203 }, { "id": "identity_310000", "image": "IconQA/images/98278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are green?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 25.140625, "loss": 2.403757095336914 }, { "id": "identity_66908", "image": "TQA/images/abc_question_images/nuclear_energy_18118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the initial heavy nucleus before fission occurs?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.71875, "loss": 2.7901523113250732 }, { "id": "identity_83373", "image": "GeoQA+/images/5149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB∥CD and ∠C = 140°, what is the measure of ∠EFG?" }, { "from": "gpt", "value": "The answer is 20°" } ], "clip_score": 32.875, "loss": 1.8533881902694702 }, { "id": "identity_193444", "image": "PMC-VQA/images/PMC7523872_F1_262813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of abnormality in the bone structures shown in the image?" }, { "from": "gpt", "value": "The answer is No signs of abnormality" } ], "clip_score": 27.421875, "loss": 2.8862662315368652 }, { "id": "identity_156887", "image": "TQA/images/abc_question_images/tides_10145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the Earth's crust?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.109375, "loss": 3.005856990814209 }, { "id": "identity_80688", "image": "UniGeo/images/616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the ladder AB is 2.5 meters long and the distance from point B to the wall is 1.4 meters, what is the length of the ladder from point B to point D?" }, { "from": "gpt", "value": "The answer is The length from B to D is 0.5 meters" } ], "clip_score": 27.265625, "loss": 2.074939250946045 }, { "id": "identity_190512", "image": "TextVQA/images/b25f2771b8d91588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many CDs are included in this set?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 12.046875, "loss": 2.7135684490203857 }, { "id": "identity_144647", "image": "DocVQA/images/tlph0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of release for the information contained in the letter?" }, { "from": "gpt", "value": "The answer is December 14, 1972" } ], "clip_score": 27.765625, "loss": 1.7696651220321655 }, { "id": "identity_259235", "image": "PMC-VQA/images/PMC9313141_brainsci-12-00838-f001_349803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the specific anatomical structures surrounding the anterior limb of the internal capsule (ALIC) lesions shown in the CT scan and describe their significance in relation to the lesion" }, { "from": "gpt", "value": "The answer is Caudate nucleus, lentiform nucleus, thalamus, corona radiata" } ], "clip_score": 27.8125, "loss": 1.7869716882705688 }, { "id": "identity_68225", "image": "ScienceQA/images/1476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the object in the image?" }, { "from": "gpt", "value": "The answer is Eating or serving food" } ], "clip_score": 22.296875, "loss": 2.76639461517334 }, { "id": "identity_253588", "image": "VQA-AS/images/8985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in the image, and what are they doing?" }, { "from": "gpt", "value": "The answer is Two people; one riding a bicycle, one waving on a bench" } ], "clip_score": 30.953125, "loss": 2.481767177581787 }, { "id": "identity_226763", "image": "ScienceQA/images/8395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the tertiary consumer in this food web and explain the energy transfer from the primary producer to this organism" }, { "from": "gpt", "value": "The answer is black crappie" } ], "clip_score": 34.125, "loss": 3.4822118282318115 }, { "id": "identity_150721", "image": "AI2D/images/118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which season is represented by the Earth's position at the top of the image?" }, { "from": "gpt", "value": "The answer is Winter" } ], "clip_score": 26.8125, "loss": 3.8419716358184814 }, { "id": "identity_3252", "image": "TextVQA/images/eb0d2e351d5c2594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on top of the dugout?" }, { "from": "gpt", "value": "The answer is washington nationals" } ], "clip_score": 29.578125, "loss": 2.217531204223633 }, { "id": "identity_25404", "image": "DocVQA/images/lhjh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the Great Western Sugar Company located?" }, { "from": "gpt", "value": "The answer is Deaver, Colorado" } ], "clip_score": 24.765625, "loss": 3.9009289741516113 }, { "id": "identity_127040", "image": "TextVQA/images/52fdbee77a8bb966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the event advertised in the poster?" }, { "from": "gpt", "value": "The answer is III Semana de La Copla" } ], "clip_score": 31.25, "loss": 2.439502477645874 }, { "id": "identity_12247", "image": "AI2D/images/2606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the natural satellite of Earth?\nChoices:\n(A) Moon.\n(B) Sun.\n(C) Earth.\n(D) Star." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.203125, "loss": 4.242664813995361 }, { "id": "identity_92915", "image": "TabMWP/images/6867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people waited for more than 70 minutes but less than 80 minutes?" }, { "from": "gpt", "value": "The answer is 6 people" } ], "clip_score": 27.171875, "loss": 2.6145646572113037 }, { "id": "identity_64102", "image": "TQA/images/question_images/cycle_nitrogen_6714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final product of bacterial fixation in legumes and soil as shown in the image?" }, { "from": "gpt", "value": "The answer is Elemental nitrogen, N2" } ], "clip_score": 25.28125, "loss": 2.9524238109588623 }, { "id": "identity_95596", "image": "CLEVR-Math/images/CLEVR_train_008112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in total?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 22.34375, "loss": 1.8546504974365234 }, { "id": "identity_143088", "image": "MapQA/images/map_9314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Texas have the lowest value in the USA?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.75, "loss": 2.5690741539001465 }, { "id": "identity_56523", "image": "FigureQA/images/70854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label does Medium Aqua intersect with Dark Turquoise?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 28.890625, "loss": 1.8319464921951294 }, { "id": "identity_163119", "image": "GeoQA+/images/1129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle CEF is 5 and the area of triangle ABC is 20, what is the ratio of the length of segment AD to the length of segment BC?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 33.40625, "loss": 1.8265018463134766 }, { "id": "identity_272762", "image": "IconQA/images/71042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is it shown on the clock while Britney is preparing eggs in the morning?\nChoices:\n(A) 9:30 A.M.\n(B) 9:30 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.234375, "loss": 3.735950469970703 }, { "id": "identity_263073", "image": "PMC-VQA/images/PMC7767912_f1_330639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of image displayed from the following options:\n(A) CT scan\n(B) PET scan\n(C) MRI scan\n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.40625, "loss": 4.208721160888672 }, { "id": "identity_122200", "image": "PMC-VQA/images/PMC6095048_F2_354667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a mass or tumor in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.015625, "loss": 3.110048770904541 }, { "id": "identity_177291", "image": "TabMWP/images/28749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the working and terrier group competitions combined?" }, { "from": "gpt", "value": "The answer is 2 hours and 10 minutes" } ], "clip_score": 31.0, "loss": 1.73013174533844 }, { "id": "identity_151558", "image": "ScienceQA/images/872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible on the map?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.03125, "loss": 4.6559977531433105 }, { "id": "identity_160526", "image": "Geometry3K/images/139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the circumference of the entire circle?" }, { "from": "gpt", "value": "The answer is 6 * π inches" } ], "clip_score": 31.734375, "loss": 2.8640153408050537 }, { "id": "identity_182627", "image": "VQA-AS/images/14520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living creatures are present?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.515625, "loss": 2.208646059036255 }, { "id": "identity_185181", "image": "PMC-VQA/images/PMC4671957_fig5_451194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are used to represent the structures in the middle image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.640625, "loss": 4.150359630584717 }, { "id": "identity_269656", "image": "PlotQA/images/41196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country had the highest expenditure on travel items?" }, { "from": "gpt", "value": "The answer is OECD members" } ], "clip_score": 30.828125, "loss": 3.037961721420288 }, { "id": "identity_108294", "image": "TextVQA/images/3283e50f09dd8f74.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many steps lead up to the front door of the house?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 20.125, "loss": 2.1472790241241455 }, { "id": "identity_181462", "image": "CLEVR-Math/images/CLEVR_train_065790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are matte?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.375, "loss": 2.5590052604675293 }, { "id": "identity_140277", "image": "MapQA/images/map_2728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values indicated by the legend?" }, { "from": "gpt", "value": "The answer is 895k to 900k" } ], "clip_score": 13.625, "loss": 2.365277051925659 }, { "id": "identity_52050", "image": "DVQA/images/bar_train_00095131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the 'critic' category in the 'series' group and the 'title' category in the 'random' group?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.9375, "loss": 2.8036136627197266 }, { "id": "identity_26403", "image": "TQA/images/question_images/human_system_nervous_6179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe is coloured green in the diagram?\nChoices:\n(A) Occipital\n(B) Temporal\n(C) Parietal\n(D) Frontal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.640625, "loss": 3.833430051803589 }, { "id": "identity_105691", "image": "VQA2.0/images/338528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No clear people visible" } ], "clip_score": 16.8125, "loss": 4.712507724761963 }, { "id": "identity_283316", "image": "DocVQA/images/kydh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Prof. Nelson Chaves currently located?" }, { "from": "gpt", "value": "The answer is Director" } ], "clip_score": 21.625, "loss": 4.918766975402832 }, { "id": "identity_316879", "image": "AI2D/images/732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the stage between the egg and the chrysalis in a butterfly's life cycle?\nChoices:\n(A) chrysalis\n(B) butterfly\n(C) eggs\n(D) caterpillar" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.828125, "loss": 3.825953722000122 }, { "id": "identity_301243", "image": "DVQA/images/bar_train_00177063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the steelblue color?" }, { "from": "gpt", "value": "The answer is code" } ], "clip_score": 22.984375, "loss": 5.1427001953125 }, { "id": "identity_109539", "image": "A-OKVQA/images/316194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the location where the red bus is parked?" }, { "from": "gpt", "value": "The answer is Fairhaven Village" } ], "clip_score": 31.109375, "loss": 2.3550868034362793 }, { "id": "identity_133869", "image": "VizWiz/images/VizWiz_train_00019475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common feature of the carvings on these stones?" }, { "from": "gpt", "value": "The answer is Carved figures with a similar expression and posture" } ], "clip_score": 34.21875, "loss": 3.774000406265259 }, { "id": "identity_196269", "image": "VQA2.0/images/26409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorbikes can be seen in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 17.984375, "loss": 2.241178512573242 }, { "id": "identity_330296", "image": "A-OKVQA/images/212501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this large green sculpture made up of?\nChoices:\n(A) paint\n(B) phones\n(C) flash lights\n(D) traffic lights" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.3125, "loss": 4.0013556480407715 }, { "id": "identity_326394", "image": "VQA2.0/images/385145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are these parked?" }, { "from": "gpt", "value": "The answer is bike rack" } ], "clip_score": 30.03125, "loss": 1.8851819038391113 }, { "id": "identity_26171", "image": "A-OKVQA/images/12145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is this taking place in?\nChoices:\n(A) canada\n(B) england\n(C) usa\n(D) italy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.328125, "loss": 3.466423749923706 }, { "id": "identity_24628", "image": "ScienceQA/images/18096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Grayson and Perry trade to each get what they want?\nChoices:\n(A) Grayson can trade his tomatoes for Perry's carrots.\n(B) Perry can trade his almonds for Grayson's tomatoes.\n(C) Perry can trade his broccoli for Grayson's oranges.\n(D) Grayson can trade his tomatoes for Perry's broccoli." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.890625, "loss": 3.035120964050293 }, { "id": "identity_196326", "image": "PMC-VQA/images/PMC7403423_Fig7_226995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of lymph node involvement in the image?" }, { "from": "gpt", "value": "The answer is No evidence of lymph node involvement" } ], "clip_score": 26.765625, "loss": 2.178938150405884 }, { "id": "identity_320931", "image": "IconQA/images/69985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 18.796875, "loss": 1.8376775979995728 }, { "id": "identity_32394", "image": "A-OKVQA/images/24380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What soft drink is advertised to the left of William Hill?\nChoices:\n(A) mountain dew\n(B) 7-up\n(C) coke zero\n(D) diet pepsi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.75, "loss": 3.8207390308380127 }, { "id": "identity_264032", "image": "FigureQA/images/13433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 3.769395351409912 }, { "id": "identity_69946", "image": "ScienceQA/images/2514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water located between Australia and the highlighted country?" }, { "from": "gpt", "value": "The answer is Coral Sea" } ], "clip_score": 29.796875, "loss": 2.6047914028167725 }, { "id": "identity_54442", "image": "DocVQA/images/ngvb0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the effective date of the form?" }, { "from": "gpt", "value": "The answer is 10/18/63" } ], "clip_score": 17.078125, "loss": 2.179791212081909 }, { "id": "identity_327931", "image": "PMC-VQA/images/PMC8568287_pone.0259245.g001_103616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed on histologic examination of the tumors in the perianal area?\nChoices:\n(A) Normal anorectal histology \n(B) Large perianal tumors \n(C) Invasive anal carcinoma \n(D) Colonic ulcers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.546875, "loss": 3.7251548767089844 }, { "id": "identity_160734", "image": "GeoQA+/images/941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠BEC = 35° and CE bisects ∠ACD, what is the measure of ∠ACD?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 30.875, "loss": 1.5791277885437012 }, { "id": "identity_334135", "image": "A-OKVQA/images/99678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is typically on this type of vehicle?\nChoices:\n(A) boat captain\n(B) pilot\n(C) army sergeant\n(D) paratrooper" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.640625, "loss": 4.425058841705322 }, { "id": "identity_205252", "image": "A-OKVQA/images/314370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a loaf of bread on the counter, and if so, what is its position relative to the stove?" }, { "from": "gpt", "value": "The answer is Yes, to the right of the stove" } ], "clip_score": 23.890625, "loss": 2.4579832553863525 }, { "id": "identity_34285", "image": "GeoQA+/images/3108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在边长为1的小正方形组成的网格中,点A、B、O为格点,则tan∠AOB=()\nChoices:\n(A) \\frac{1}{2}\n(B) \\frac{√{2}}{3}\n(C) \\frac{√{10}}{5}\n(D) \\frac{√{5}}{3}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.65625, "loss": 2.5444564819335938 }, { "id": "identity_141438", "image": "DocVQA/images/qghv0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the suite number for the Food and Drug Law Institute?" }, { "from": "gpt", "value": "The answer is Suite 380" } ], "clip_score": 18.625, "loss": 2.545729398727417 }, { "id": "identity_81808", "image": "GeoQA+/images/1065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD?" }, { "from": "gpt", "value": "The answer is 40.5 degrees" } ], "clip_score": 30.9375, "loss": 2.2229301929473877 }, { "id": "identity_5258", "image": "Super-CLEVR/images/superCLEVR_new_001227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a matte thing that is the same color as the tiny shiny tandem bike; what shape is it?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 27.4375, "loss": 3.7649974822998047 }, { "id": "identity_183713", "image": "VQA-AS/images/19047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the ground in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.296875, "loss": 3.015500068664551 }, { "id": "identity_267196", "image": "DocVQA/images/kyjg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what location and date did the Seventeenth Annual Meeting of the Association of Teachers of Preventive Medicine take place?" }, { "from": "gpt", "value": "The answer is San Francisco, California, October 30, 1960" } ], "clip_score": 31.90625, "loss": 0.9598422050476074 }, { "id": "identity_129078", "image": "A-OKVQA/images/296544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pizza slices are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.34375, "loss": 2.1317355632781982 }, { "id": "identity_323621", "image": "A-OKVQA/images/505967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which city might you find this place?\nChoices:\n(A) new york\n(B) las vegas\n(C) los angeles\n(D) reno" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.140625, "loss": 3.973959445953369 }, { "id": "identity_64203", "image": "ScienceQA/images/15365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many degrees Celsius did the temperature change for the refrigerator with its door closed?" }, { "from": "gpt", "value": "The answer is 1.6°C" } ], "clip_score": 29.515625, "loss": 1.8509355783462524 }, { "id": "identity_195576", "image": "PMC-VQA/images/PMC6014307_F0005_332052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of blood flow within the central structure?" }, { "from": "gpt", "value": "The answer is No blood flow" } ], "clip_score": 22.5, "loss": 4.125284194946289 }, { "id": "identity_79160", "image": "GeoQA+/images/3432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is parallel to CD and AC equals BC, if angle 2 measures x degrees, what is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is x degrees" } ], "clip_score": 33.78125, "loss": 2.168886661529541 }, { "id": "identity_231053", "image": "TQA/images/abc_question_images/human_system_digestive_16089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organ labeled D and describe its primary function in the digestive system" }, { "from": "gpt", "value": "The answer is Stomach, breaks down and digests food to extract nutrients" } ], "clip_score": 34.59375, "loss": 2.662917375564575 }, { "id": "identity_313216", "image": "VQA2.0/images/475988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cigarette is this vehicle advertising?" }, { "from": "gpt", "value": "The answer is marlboro" } ], "clip_score": 21.46875, "loss": 2.6066927909851074 }, { "id": "identity_20485", "image": "PMC-VQA/images/PMC7838061_Fig2_353843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bile ducts were not invaded according to the MRI report? \nChoices:\n(A) right anterior ventral bile duct (yellow arrow) \n(B) left posterior hepatic duct (green arrow) \n(C) left anterior dorsal bile duct (pink arrow) \n(D) right posterior hepatic duct (blue arrow)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.875, "loss": 3.2830357551574707 }, { "id": "identity_173565", "image": "IconQA/images/84824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each triangle were to be colored with 2 different colors, how many individual color sections would there be?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 33.15625, "loss": 2.3713090419769287 }, { "id": "identity_104484", "image": "VQA-RAD/images/synpic59935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of this MRI scan?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 29.46875, "loss": 3.3374998569488525 }, { "id": "identity_319721", "image": "TabMWP/images/32427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the numbers of grocery stores in each town?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.359375, "loss": 3.0047500133514404 }, { "id": "identity_297583", "image": "VQA2.0/images/214461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name or wording on the sign?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 20.84375, "loss": 2.009211778640747 }, { "id": "identity_303524", "image": "PMC-VQA/images/PMC8908670_Fig2_217519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the lesion located in the preoperative imaging of the 22-year-old patient with osteosarcoma?\nChoices:\n(A) Distal femur\n(B) Proximal femur\n(C) Tibia\n(D) Humerus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0, "loss": 4.39171028137207 }, { "id": "identity_163419", "image": "UniGeo/images/3343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the sector formed by angle AOC in the given circle with radius 5 units?" }, { "from": "gpt", "value": "The answer is 6.1π square units" } ], "clip_score": 34.875, "loss": 2.5655159950256348 }, { "id": "identity_272608", "image": "FigureQA/images/99306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the curve with the minimum area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.453125, "loss": 4.1794562339782715 }, { "id": "identity_53213", "image": "DVQA/images/bar_train_00098599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people represented in the graph?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 27.015625, "loss": 2.4109883308410645 }, { "id": "identity_135034", "image": "FigureQA/images/81072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Tomato ever surpass Dark Red in value?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.65625, "loss": 3.198429822921753 }, { "id": "identity_207381", "image": "VQA2.0/images/52849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are lined up in the image?" }, { "from": "gpt", "value": "The answer is Food trucks" } ], "clip_score": 31.5625, "loss": 2.3467559814453125 }, { "id": "identity_136558", "image": "DocVQA/images/hgbf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date for the National Academy of Sciences meeting in Boulder, Co?" }, { "from": "gpt", "value": "The answer is June 28-July 9, 1976" } ], "clip_score": 28.703125, "loss": 1.799190878868103 }, { "id": "identity_308747", "image": "TabMWP/images/1075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: This is Sandra's complete financial record for December. How much money was spent on December 29?" }, { "from": "gpt", "value": "The answer is 7.80" } ], "clip_score": 29.328125, "loss": 2.2560789585113525 }, { "id": "identity_207569", "image": "VQA2.0/images/255422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the tents or temporary structures in the image?" }, { "from": "gpt", "value": "The answer is Blue and Yellow" } ], "clip_score": 20.15625, "loss": 2.873668670654297 }, { "id": "identity_98658", "image": "IconQA/images/40350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the height of the letter 'J' before and after the transformation?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.84375, "loss": 2.953216552734375 }, { "id": "identity_242700", "image": "ChartQA/images/multi_col_40548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average wealth in trillion U.S. dollars for Europe from 2010 to 2015, and how does it compare to the average wealth for the United Kingdom over the same period?" }, { "from": "gpt", "value": "The answer is Europe's average is 9.90 trillion U.S. dollars higher" } ], "clip_score": 31.96875, "loss": 2.4568674564361572 }, { "id": "identity_208456", "image": "Super-CLEVR/images/superCLEVR_new_000647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the largest object in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 26.203125, "loss": 3.6263885498046875 }, { "id": "identity_222573", "image": "VizWiz/images/VizWiz_train_00001477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the birdcage made of?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 29.765625, "loss": 4.22265625 }, { "id": "identity_30389", "image": "DocVQA/images/ghlb0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date mentioned in the given page ?" }, { "from": "gpt", "value": "The answer is Thursday, February 2" } ], "clip_score": 21.0, "loss": 1.7951980829238892 }, { "id": "identity_185881", "image": "TextVQA/images/88a82450030b9906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text on the golf ball in the right glass?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 33.59375, "loss": 3.737109661102295 }, { "id": "identity_13264", "image": "TabMWP/images/14361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some friends went bowling and kept track of their scores. If you combine their scores, how many points did Dean and Charlie get?" }, { "from": "gpt", "value": "The answer is 248" } ], "clip_score": 38.03125, "loss": 2.6630146503448486 }, { "id": "identity_165990", "image": "TabMWP/images/34783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Rick also bought 2 kilograms of linen scrap and 4 kilograms of fiber scrap, how much did he spend in total?" }, { "from": "gpt", "value": "The answer is $16.21" } ], "clip_score": 31.9375, "loss": 2.250613212585449 }, { "id": "identity_318902", "image": "UniGeo/images/1699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB∥CD,∠AGE=128°,HM平分∠EHD,则∠MHD的度数是()\nChoices:\n(A) 46°\n(B) 23°\n(C) 26°\n(D) 24" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 2.6338000297546387 }, { "id": "identity_235173", "image": "AI2D/images/3761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure that is directly above the diaphragm and describe its function in the respiratory system" }, { "from": "gpt", "value": "The answer is Left lung, responsible for gas exchange in the respiratory system" } ], "clip_score": 32.375, "loss": 2.3139095306396484 }, { "id": "identity_22122", "image": "Super-CLEVR/images/superCLEVR_new_001748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the small blue thing?" }, { "from": "gpt", "value": "The answer is chopper" } ], "clip_score": 17.09375, "loss": 3.4305660724639893 }, { "id": "identity_54145", "image": "DocVQA/images/jmlg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the voucher number mentioned in the document?" }, { "from": "gpt", "value": "The answer is #11" } ], "clip_score": 32.40625, "loss": 3.4261562824249268 }, { "id": "identity_2031", "image": "FigureQA/images/5582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.470885276794434 }, { "id": "identity_298844", "image": "MapQA/images/map_2349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the states with the highest value in the USA?" }, { "from": "gpt", "value": "The answer is Colorado" } ], "clip_score": 28.75, "loss": 2.9297726154327393 }, { "id": "identity_121989", "image": "VQA-AS/images/14291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of window covering is used on the windows?" }, { "from": "gpt", "value": "The answer is Drapes with tiebacks" } ], "clip_score": 19.65625, "loss": 3.1182851791381836 }, { "id": "identity_160869", "image": "UniGeo/images/1276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,从⊙O外一点A引圆的切线AB,切点为B,连接AO并延长交圆于点C,连接BC.已知∠A=26°,求∠OAC的度数为多少?" }, { "from": "gpt", "value": "The answer is 58°" } ], "clip_score": 25.28125, "loss": 2.247704267501831 }, { "id": "identity_55779", "image": "DVQA/images/bar_train_00178555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of the \"tray\" bars for both \"action\" and \"lap\"?" }, { "from": "gpt", "value": "The answer is Approximately 10^8 + 10^7" } ], "clip_score": 26.8125, "loss": 2.533377170562744 }, { "id": "identity_136209", "image": "PlotQA/images/95493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of natural gas rent as percent of GDP for East Asia (all income levels) from 2002 to 2012?" }, { "from": "gpt", "value": "The answer is Decreasing trend" } ], "clip_score": 31.109375, "loss": 3.0383248329162598 }, { "id": "identity_63682", "image": "ScienceQA/images/16448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the bat star play in this marine ecosystem?" }, { "from": "gpt", "value": "The answer is decomposer" } ], "clip_score": 29.546875, "loss": 3.2474000453948975 }, { "id": "identity_70234", "image": "TQA/images/abc_question_images/human_system_digestive_13708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"T\" indicate in the digestive system diagram?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 33.0, "loss": 4.270445346832275 }, { "id": "identity_133468", "image": "A-OKVQA/images/355342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear are the performers wearing?" }, { "from": "gpt", "value": "The answer is Traditional Chinese shoes" } ], "clip_score": 26.0625, "loss": 2.8636083602905273 }, { "id": "identity_312601", "image": "UniGeo/images/1811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,AB=AD=DC,若∠BAD=36°,则∠C的大小为多少?\n选项:\n(A) 36°\n(B) 38°\n(C) 40°\n(D) 42°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.25, "loss": 2.6756961345672607 }, { "id": "identity_266338", "image": "ScienceQA/images/10517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) blue\n(B) bouncy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.1875, "loss": 4.382851600646973 }, { "id": "identity_215795", "image": "VQA2.0/images/380474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Afternoon" } ], "clip_score": 11.8828125, "loss": 2.9043941497802734 }, { "id": "identity_181611", "image": "TabMWP/images/23689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was Sandra's available funds at the beginning of December?" }, { "from": "gpt", "value": "The answer is $328.20" } ], "clip_score": 29.078125, "loss": 2.190192937850952 }, { "id": "identity_326476", "image": "MapQA/images/map_10062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value here?" }, { "from": "gpt", "value": "The answer is 48.8%-68.0%" } ], "clip_score": 18.359375, "loss": 2.3862881660461426 }, { "id": "identity_313465", "image": "UniGeo/images/428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆中,有三个点A、B、C在圆上,∠OAB=50°,那么∠C的度数是多少?\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.578125, "loss": 2.6195619106292725 }, { "id": "identity_127512", "image": "A-OKVQA/images/568555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vases that are the same design?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.734375, "loss": 3.1549973487854004 }, { "id": "identity_260773", "image": "ScienceQA/images/8207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony borders both New Jersey and Pennsylvania?" }, { "from": "gpt", "value": "The answer is Delaware" } ], "clip_score": 31.078125, "loss": 2.2432572841644287 }, { "id": "identity_193642", "image": "PMC-VQA/images/PMC5241480_fig1_115273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is this image likely from?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI sequence" } ], "clip_score": 34.4375, "loss": 1.4372895956039429 }, { "id": "identity_224768", "image": "AI2D/images/1217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organelle responsible for energy production and describe its structure and function in detail" }, { "from": "gpt", "value": "The answer is Mitochondrion, double membrane, cristae, matrix, ATP production" } ], "clip_score": 31.015625, "loss": 2.181365966796875 }, { "id": "identity_194342", "image": "PMC-VQA/images/PMC5733951_fig1_254274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality was used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.59375, "loss": 2.455606698989868 }, { "id": "identity_200208", "image": "VQA-AS/images/13049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ducks are there in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.21875, "loss": 2.1165096759796143 }, { "id": "identity_293569", "image": "PMC-VQA/images/PMC3427543_F1_151774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the MRI of subject CC, where is the focus shown?\nChoices:\n(A) Left thalamus\n(B) Right thalamus\n(C) Left centrum semiovale\n(D) Right centrum semiovale" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.03125, "loss": 3.7172393798828125 }, { "id": "identity_98816", "image": "IconQA/images/94161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wings do the geese have in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.234375, "loss": 2.031489372253418 }, { "id": "identity_26704", "image": "IconQA/images/92803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a circle with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.40625, "loss": 3.9392166137695312 }, { "id": "identity_203749", "image": "PMC-VQA/images/PMC6419362_Fig16_451805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar representing in the image?" }, { "from": "gpt", "value": "The answer is Measurement scale for the image" } ], "clip_score": 23.03125, "loss": 3.0038907527923584 }, { "id": "identity_12162", "image": "IconQA/images/516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure the line. The line is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.953125, "loss": 2.511168956756592 }, { "id": "identity_278138", "image": "UniGeo/images/3290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, BD is the diameter of circle O, chord AC is perpendicular to BD with E as the foot of the perpendicular, and ∠AOB is 60°. What is the measure of ∠BDC? \n(A) 30°\n(B) 45°\n(C) 60°\n(D) 90°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.0625, "loss": 2.3044776916503906 }, { "id": "identity_203222", "image": "VQA-AS/images/13376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paintings are hanging on the wall?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.46875, "loss": 2.4441750049591064 }, { "id": "identity_165538", "image": "GeoQA+/images/2455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ABCD is a rhombus with side length 5, and AE is perpendicular to CB with AE = 6, find the length of CF" }, { "from": "gpt", "value": "The answer is √73.5" } ], "clip_score": 34.28125, "loss": 2.793713331222534 }, { "id": "identity_209235", "image": "PMC-VQA/images/PMC7706203_Fig2_311534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the MRI image showing a cross-sectional view of the brain?" }, { "from": "gpt", "value": "The answer is Yes, it is a cross-sectional (axial) view" } ], "clip_score": 35.8125, "loss": 2.081151008605957 }, { "id": "identity_1626", "image": "DocVQA/images/skng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the document dated?" }, { "from": "gpt", "value": "The answer is August 11, 1997" } ], "clip_score": 24.890625, "loss": 1.7563825845718384 }, { "id": "identity_17560", "image": "FigureQA/images/5500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Seafoam the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.5, "loss": 3.9122471809387207 }, { "id": "identity_212174", "image": "A-OKVQA/images/236747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the workbench surface?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.046875, "loss": 2.8407340049743652 }, { "id": "identity_23008", "image": "GeoQA+/images/430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,在△ABC中,∠ACB=90°,分别以AB、BC、AC为边向外作正方形,若三个正方形的面积分别为225、400、S,则S的值为()\nChoices:\n(A) 25\n(B) 175\n(C) 600\n(D) 625" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.640625, "loss": 2.355339765548706 }, { "id": "identity_18247", "image": "UniGeo/images/2784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A、B、C在⊙O上,若∠AOB=70°,则∠ACB等于()\nChoices:\n(A) 20°\n(B) 35°\n(C) 45°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.96875, "loss": 2.4458260536193848 }, { "id": "identity_269849", "image": "ScienceQA/images/16717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Florida?\nChoices:\n(A) Tallahassee\n(B) Richmond\n(C) Orlando\n(D) Columbia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.3125, "loss": 4.047605514526367 }, { "id": "identity_132434", "image": "A-OKVQA/images/118544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the headphones on the table?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.078125, "loss": 3.2754809856414795 }, { "id": "identity_264008", "image": "ScienceQA/images/19125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.875, "loss": 4.0469207763671875 }, { "id": "identity_151248", "image": "ScienceQA/images/7699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total volume of pink particles in Solution A?" }, { "from": "gpt", "value": "The answer is 7 mL" } ], "clip_score": 36.75, "loss": 2.7255616188049316 }, { "id": "identity_36646", "image": "TabMWP/images/7684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each bowl has 2 strawberries. How many strawberries are in 6 bowls?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 40.21875, "loss": 1.6254873275756836 }, { "id": "identity_165751", "image": "GeoQA+/images/4629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given rhombus ABCD, if the perimeter is 16, what is the length of side AD?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.0, "loss": 2.1227147579193115 }, { "id": "identity_182718", "image": "VQA2.0/images/322892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the aircraft?" }, { "from": "gpt", "value": "The answer is Silver/Metallic" } ], "clip_score": 27.125, "loss": 2.911285877227783 }, { "id": "identity_318498", "image": "Super-CLEVR/images/superCLEVR_new_000452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many suvs are either green rubber objects or blue things?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.75, "loss": 2.3159639835357666 }, { "id": "identity_18637", "image": "ScienceQA/images/7889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) New Zealand\n(B) Palau\n(C) Australia\n(D) Papua New Guinea" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.875, "loss": 3.9127299785614014 }, { "id": "identity_89656", "image": "GeoQA+/images/5082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若F、G分别是DE、BC的中点, 且AG=4, AB=5, 则线段AC的长度为多少?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 28.25, "loss": 2.7109901905059814 }, { "id": "identity_334440", "image": "A-OKVQA/images/51674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the vessel on the left moving?\nChoices:\n(A) away\n(B) left\n(C) towards\n(D) right" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.15625, "loss": 4.039098739624023 }, { "id": "identity_47181", "image": "DocVQA/images/pklm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there parking available at the event location?" }, { "from": "gpt", "value": "The answer is Limited parking is available" } ], "clip_score": 17.890625, "loss": 2.687288522720337 }, { "id": "identity_230454", "image": "Geometry3K/images/1668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the area of triangle ABC. Round to the nearest tenth" }, { "from": "gpt", "value": "The answer is 45.7" } ], "clip_score": 31.546875, "loss": 2.4234859943389893 }, { "id": "identity_302072", "image": "GeoQA+/images/4300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,AB=13,AC=5,BC=12,经过点C且与边AB相切的动圆与CA、CB分别相交于点P、Q,则线段PQ长度的最小值是多少?\n选项:\n(A) \\frac{12}{5}\n(B) \\frac{60}{13}\n(C) 5\n(D) 无法确定" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.125, "loss": 2.412471294403076 }, { "id": "identity_121346", "image": "A-OKVQA/images/38645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the flower pot?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.875, "loss": 3.2857413291931152 }, { "id": "identity_228852", "image": "DocVQA/images/jfkf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the five types of documents listed in the overview of the November 2006 submission?" }, { "from": "gpt", "value": "The answer is Summary document, Meta-analysis report, Kaiser first cohort report, New epidemiology report(s), Executive summary of bladder cancer cases" } ], "clip_score": 42.75, "loss": 1.141442894935608 }, { "id": "identity_318749", "image": "DVQA/images/bar_train_00040556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of seat in this?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 21.9375, "loss": 3.074361801147461 }, { "id": "identity_134841", "image": "DocVQA/images/qjgn0226_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the vendor's name mentioned in the document?" }, { "from": "gpt", "value": "The answer is Roger Emerson Jr. MD" } ], "clip_score": 26.171875, "loss": 2.934560537338257 }, { "id": "identity_205812", "image": "A-OKVQA/images/157358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the rollerbladers wearing knee pads?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.3125, "loss": 2.9494521617889404 }, { "id": "identity_7568", "image": "IconQA/images/31454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show three things Nicole did Sunday after lunch. Which did Nicole do last?\nChoices:\n(A) drew a picture\n(B) fed the dog\n(C) did homework" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.375, "loss": 3.120530605316162 }, { "id": "identity_238261", "image": "MapQA/images/map_1701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state in the Northeast has the second highest value, and what is its percentage range?" }, { "from": "gpt", "value": "The answer is New York, 49.0%-50.2%" } ], "clip_score": 28.21875, "loss": 1.8837095499038696 }, { "id": "identity_25816", "image": "PMC-VQA/images/PMC3681609_F1_211116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the datasets shown in the image? \nChoices:\n(A) MRID and PET \n(B) CT and fMRI \n(C) CT and MRID \n(D) CT and EEG " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.015625, "loss": 3.781418561935425 }, { "id": "identity_273483", "image": "Geometry3K/images/1117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle P.\nChoices:\n(A) 30\n(B) 60\n(C) 120\n(D) 150" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.453125, "loss": 3.125930070877075 }, { "id": "identity_29300", "image": "IconQA/images/23761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) probable\n(C) certain\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 4.37458610534668 }, { "id": "identity_281627", "image": "TabMWP/images/2479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of basketball hoops in each park, based on the comparison conducted by the parks department?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 39.625, "loss": 2.6132564544677734 }, { "id": "identity_14267", "image": "DVQA/images/bar_train_00091575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the most number of people summed across all the categories?" }, { "from": "gpt", "value": "The answer is fellow" } ], "clip_score": 35.125, "loss": 3.12849760055542 }, { "id": "identity_22594", "image": "A-OKVQA/images/351367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people doing in the stands?\nChoices:\n(A) knitting\n(B) spectating\n(C) protesting\n(D) gaming" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.734375, "loss": 4.644607067108154 }, { "id": "identity_77914", "image": "TQA/images/abc_question_images/human_system_excretory_16117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure indicated by the label 'J' is responsible for filtering blood?" }, { "from": "gpt", "value": "The answer is Kidney" } ], "clip_score": 29.203125, "loss": 2.375311851501465 }, { "id": "identity_132145", "image": "Super-CLEVR/images/superCLEVR_new_001924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the green bicycle?" }, { "from": "gpt", "value": "The answer is A blue motorbike" } ], "clip_score": 32.65625, "loss": 3.8756518363952637 }, { "id": "identity_224904", "image": "TextVQA/images/17ce215d6f745b28.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of cleaning products are visible in the image, and what are they?" }, { "from": "gpt", "value": "The answer is three: duster, spray cleaner, and cleaning solution" } ], "clip_score": 16.203125, "loss": 2.2805681228637695 }, { "id": "identity_170414", "image": "TabMWP/images/17931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shipments had a prime number of boxes?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.375, "loss": 2.4119088649749756 }, { "id": "identity_279480", "image": "DVQA/images/bar_train_00117218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For any dataset, which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is harm" } ], "clip_score": 29.890625, "loss": 3.9785895347595215 }, { "id": "identity_82041", "image": "Geometry3K/images/1166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If P R \\parallel K L, K N = 9, L N = 16, and the area of triangle KLN is 72, find the length of segment KL" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 30.296875, "loss": 2.432892084121704 }, { "id": "identity_309687", "image": "TextVQA/images/9f3f51db8c3de90d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this street?" }, { "from": "gpt", "value": "The answer is 10 av" } ], "clip_score": 18.34375, "loss": 2.712646245956421 }, { "id": "identity_117448", "image": "PMC-VQA/images/PMC6545224_Fig2_488282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible markers or indicators on the image that suggest the orientation of the scan?" }, { "from": "gpt", "value": "The answer is No visible orientation markers" } ], "clip_score": 25.578125, "loss": 4.252373695373535 }, { "id": "identity_70875", "image": "AI2D/images/3261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flagella are present in the cell shown in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 23.484375, "loss": 4.037262916564941 }, { "id": "identity_174338", "image": "TabMWP/images/24263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a passenger arrives at Brookfield at 11:00 A.M., how long do they have to wait for the next train?" }, { "from": "gpt", "value": "The answer is 15 minutes" } ], "clip_score": 29.734375, "loss": 2.1359639167785645 }, { "id": "identity_185384", "image": "TextVQA/images/81cb69ea15958267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many straws are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.828125, "loss": 4.149618148803711 }, { "id": "identity_298134", "image": "PMC-VQA/images/PMC7509077_f0005_259228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image of the lesion?\nChoices:\n(A) Rim+ type intensity artefact \n(B) Rim- type intensity artefact \n(C) Neither A nor B \n(D) Both A and B." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.078125, "loss": 3.9754226207733154 }, { "id": "identity_284165", "image": "TextVQA/images/6fdb7e49fc8e52a3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Have you visited Staph City before?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.359375, "loss": 2.2979013919830322 }, { "id": "identity_315947", "image": "AI2D/images/1271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is released from cellular respiration in mitochondria within an ecosystem?\nChoices:\n(A) O2\n(B) H2O only\n(C) CO2+H2O\n(D) Organic Molecules" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.328125, "loss": 3.3650519847869873 }, { "id": "identity_153483", "image": "TQA/images/question_images/cycle_water_6000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the layer that prevents water from seeping into the ground?" }, { "from": "gpt", "value": "The answer is Impervious Layer" } ], "clip_score": 25.390625, "loss": 2.076695442199707 }, { "id": "identity_28792", "image": "ScienceQA/images/13874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Wesley and Kylie trade to each get what they want?\nChoices:\n(A) Kylie can trade her broccoli for Wesley's oranges.\n(B) Wesley can trade his tomatoes for Kylie's broccoli.\n(C) Kylie can trade her almonds for Wesley's tomatoes.\n(D) Wesley can trade his tomatoes for Kylie's carrots." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.75, "loss": 2.965785026550293 }, { "id": "identity_56360", "image": "DocVQA/images/lgvh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount negotiated for 1978 and requested for 1979 combined?" }, { "from": "gpt", "value": "The answer is $411,291" } ], "clip_score": 21.859375, "loss": 2.2773869037628174 }, { "id": "identity_222488", "image": "VQA2.0/images/162319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background behind the catcher?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 23.140625, "loss": 2.6790454387664795 }, { "id": "identity_3950", "image": "DVQA/images/bar_train_00030644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.875, "loss": 3.457681655883789 }, { "id": "identity_103208", "image": "CLEVR-Math/images/CLEVR_train_061094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all red cylinders. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.859375, "loss": 2.361098527908325 }, { "id": "identity_65427", "image": "TQA/images/question_images/human_system_reproductory_7030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure in the diagram is directly connected to the bladder?" }, { "from": "gpt", "value": "The answer is Urethra" } ], "clip_score": 32.28125, "loss": 1.7303285598754883 }, { "id": "identity_299877", "image": "Super-CLEVR/images/superCLEVR_new_000909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do tiny brown metal objects exist?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 19.890625, "loss": 5.172778129577637 }, { "id": "identity_286705", "image": "GeoQA+/images/938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points E and F are on the line segment BC. If triangle ABF is congruent to triangle DCE, and AB is equal to 3, what is the length of DC?\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.65625, "loss": 2.3167529106140137 }, { "id": "identity_12785", "image": "TQA/images/abc_question_images/nuclear_energy_17102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the turbine in this picture\nChoices:\n(A) D\n(B) C\n(C) V\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.734375, "loss": 3.7178447246551514 }, { "id": "identity_52156", "image": "FigureQA/images/6635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Blue Violet section more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.875, "loss": 3.174459934234619 }, { "id": "identity_197682", "image": "PMC-VQA/images/PMC8811295_F1_183380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brain lobe is predominantly shown in this image?" }, { "from": "gpt", "value": "The answer is Parietal lobe" } ], "clip_score": 32.84375, "loss": 1.9292869567871094 }, { "id": "identity_87107", "image": "GeoQA+/images/5464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle ABC in the right triangle ABC?" }, { "from": "gpt", "value": "The answer is 54" } ], "clip_score": 32.09375, "loss": 3.022552728652954 }, { "id": "identity_210882", "image": "A-OKVQA/images/532216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person playing tennis left-handed or right-handed?" }, { "from": "gpt", "value": "The answer is Right-handed" } ], "clip_score": 29.390625, "loss": 2.255340337753296 }, { "id": "identity_221334", "image": "PMC-VQA/images/PMC6379930_Fig2_437701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gain setting of the ultrasound machine in this image?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 29.953125, "loss": 2.4164047241210938 }, { "id": "identity_300690", "image": "FigureQA/images/87074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0390625, "loss": 4.193215370178223 }, { "id": "identity_633", "image": "UniGeo/images/1478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC中,DE∥BC,\\frac{AD}{AB}=\\frac{1}{2},AE=2cm,则AC的长是()\nChoices:\n(A) 8cm\n(B) 6cm\n(C) 4cm\n(D) 2cm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.125, "loss": 2.794515609741211 }, { "id": "identity_20010", "image": "A-OKVQA/images/517570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the goal of the man working on the boat here?\nChoices:\n(A) waterproofing\n(B) decorative\n(C) spy craft\n(D) weight loss" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.5625, "loss": 4.656443119049072 }, { "id": "identity_258860", "image": "VQA2.0/images/104631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day might it be based on the lighting and shadows in the image?" }, { "from": "gpt", "value": "The answer is midday or early afternoon" } ], "clip_score": 11.734375, "loss": 2.601534843444824 }, { "id": "identity_261945", "image": "DocVQA/images/ptdm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the page number located at the top right corner of the page?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.359375, "loss": 2.9306602478027344 }, { "id": "identity_216390", "image": "TextVQA/images/0d161defbaef6885.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many containers have the word \"Canada\" written on them?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.578125, "loss": 2.4594688415527344 }, { "id": "identity_90448", "image": "UniGeo/images/3487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若∠C=36°,则∠DOB的度数是()" }, { "from": "gpt", "value": "The answer is 144°" } ], "clip_score": 31.5625, "loss": 1.8507916927337646 }, { "id": "identity_202139", "image": "A-OKVQA/images/358523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the building on the cliff?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 21.359375, "loss": 3.4108803272247314 }, { "id": "identity_318557", "image": "PMC-VQA/images/PMC8207792_Fig1_469449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI indicate in the medial thalami?\nChoices:\n(A) Low signal alteration\n(B) High signal alteration\n(C) No signal alteration\n(D) Only left signal alteration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 4.325713634490967 }, { "id": "identity_159902", "image": "GeoQA+/images/5832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD是⊙O的直径,⁀{AB}=⁀{CD},若∠AOB=40°,则∠APB的度数是多少?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 29.609375, "loss": 1.779502272605896 }, { "id": "identity_38186", "image": "VQA2.0/images/77928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors is the wall painted?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 11.2578125, "loss": 3.497185468673706 }, { "id": "identity_4344", "image": "AI2D/images/4212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: M indicates what structure?\nChoices:\n(A) Mid-ocean ridge\n(B) Oceanic plate\n(C) Continental plate\n(D) CO2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 4.545302867889404 }, { "id": "identity_104196", "image": "A-OKVQA/images/207491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final destination of the bus according to the display?" }, { "from": "gpt", "value": "The answer is Downtown" } ], "clip_score": 27.4375, "loss": 2.9622695446014404 }, { "id": "identity_55827", "image": "DVQA/images/bar_train_00116034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has a higher number of units sold for the item 'island'?" }, { "from": "gpt", "value": "The answer is injury" } ], "clip_score": 32.65625, "loss": 4.2179155349731445 }, { "id": "identity_13121", "image": "UniGeo/images/189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知E是菱形ABCD的边BC上一点,且∠DAE=∠B=80°,那么∠CDE的度数为()\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 2.714388132095337 }, { "id": "identity_234517", "image": "FigureQA/images/53061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage is Medium Purple less than Maroon?" }, { "from": "gpt", "value": "The answer is 33.33%" } ], "clip_score": 39.5, "loss": 1.148051142692566 }, { "id": "identity_22516", "image": "A-OKVQA/images/73445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the method of locomotion here powered?\nChoices:\n(A) gasoline\n(B) hay\n(C) oil\n(D) coal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.546875, "loss": 4.260470390319824 }, { "id": "identity_217214", "image": "A-OKVQA/images/267042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of laptops are displayed on the table?" }, { "from": "gpt", "value": "The answer is Toshiba" } ], "clip_score": 28.984375, "loss": 1.8713490962982178 }, { "id": "identity_173771", "image": "TabMWP/images/36111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which event has the shortest duration?" }, { "from": "gpt", "value": "The answer is jump rope race" } ], "clip_score": 25.890625, "loss": 2.735428810119629 }, { "id": "identity_92391", "image": "TabMWP/images/11517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest number of passengers on a single train?" }, { "from": "gpt", "value": "The answer is 52" } ], "clip_score": 34.53125, "loss": 2.464188814163208 }, { "id": "identity_218207", "image": "A-OKVQA/images/356384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of water activity is predominantly featured in this image?" }, { "from": "gpt", "value": "The answer is Surfing" } ], "clip_score": 27.796875, "loss": 2.704740047454834 }, { "id": "identity_249910", "image": "AI2D/images/818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary producer and list all the organisms that directly or indirectly depend on it for their energy source" }, { "from": "gpt", "value": "The answer is Snail, Tadpole, Water Beetle, Small fish, Frog, Kingfisher" } ], "clip_score": 32.53125, "loss": 1.8588582277297974 }, { "id": "identity_200360", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f007_203204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any organs in the image?" }, { "from": "gpt", "value": "The answer is Yes, the bladder" } ], "clip_score": 25.328125, "loss": 3.19588041305542 }, { "id": "identity_84021", "image": "Geometry3K/images/569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the third side of the triangle, given that the other two sides are 14 and 15 units long, and the tangent segment from the point of tangency to the vertex opposite the 15-unit side is 9 units long?" }, { "from": "gpt", "value": "The answer is 18 units" } ], "clip_score": 33.875, "loss": 2.23874568939209 }, { "id": "identity_215909", "image": "VQA2.0/images/230712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone area code listed on the ice cream cart?" }, { "from": "gpt", "value": "The answer is 226" } ], "clip_score": 23.078125, "loss": 2.8993868827819824 }, { "id": "identity_122073", "image": "PMC-VQA/images/PMC7156185_F2_151427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle measurement indicated by the yellow protractor in the image?" }, { "from": "gpt", "value": "The answer is 143 degrees" } ], "clip_score": 20.75, "loss": 2.377274513244629 }, { "id": "identity_307463", "image": "DVQA/images/bar_train_00048517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest total value?" }, { "from": "gpt", "value": "The answer is site" } ], "clip_score": 22.96875, "loss": 3.5095317363739014 }, { "id": "identity_127612", "image": "VQA2.0/images/168558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the airplane flying above or below the clouds?" }, { "from": "gpt", "value": "The answer is Above" } ], "clip_score": 25.125, "loss": 2.2620677947998047 }, { "id": "identity_60646", "image": "FigureQA/images/41055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Purple have the largest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.0625, "loss": 3.029372453689575 }, { "id": "identity_183099", "image": "A-OKVQA/images/566576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organization's name is mentioned on the sign next to the bus station name?" }, { "from": "gpt", "value": "The answer is County Council" } ], "clip_score": 30.625, "loss": 4.118749141693115 }, { "id": "identity_252560", "image": "TabMWP/images/27724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money will Dominic have left if he buys one ticket for each type of sport listed?" }, { "from": "gpt", "value": "The answer is -48" } ], "clip_score": 33.71875, "loss": 3.697232484817505 }, { "id": "identity_146373", "image": "DocVQA/images/phnm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of document is indicated in the top right corner?" }, { "from": "gpt", "value": "The answer is White Copy" } ], "clip_score": 20.53125, "loss": 2.578117609024048 }, { "id": "identity_193856", "image": "PMC-VQA/images/PMC6006706_Fig1_330441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lesion in the left hemisphere larger than the one in the right hemisphere?" }, { "from": "gpt", "value": "The answer is Yes, the left lesion is larger" } ], "clip_score": 30.6875, "loss": 2.17209792137146 }, { "id": "identity_19051", "image": "PMC-VQA/images/PMC4656244_Fig6_446794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrow on image b?\nChoices:\n(A) Enlarged renal vein \n(B) Enlarged hepatic vein \n(C) Enlarged RRV \n(D) Enlarged gonadal vein" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.546875, "loss": 4.203727722167969 }, { "id": "identity_258012", "image": "TextVQA/images/2fe2cd44915da4d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of all the time slots listed on the board?" }, { "from": "gpt", "value": "The answer is 200 minutes" } ], "clip_score": 21.71875, "loss": 1.973065733909607 }, { "id": "identity_110739", "image": "A-OKVQA/images/50431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the horses pulling the carriage?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 30.59375, "loss": 3.1991612911224365 }, { "id": "identity_68735", "image": "AI2D/images/3388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does this butterfly have?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 29.421875, "loss": 3.384188413619995 }, { "id": "identity_5177", "image": "VQA2.0/images/167118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sign in the background say?" }, { "from": "gpt", "value": "The answer is one way" } ], "clip_score": 18.390625, "loss": 2.7946536540985107 }, { "id": "identity_221237", "image": "TextVQA/images/378e06f3ea05441c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the Disney Princess Trio Analog Watch with Clock?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 23.90625, "loss": 2.719525098800659 }, { "id": "identity_231397", "image": "FigureQA/images/63627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average value over the entire range?" }, { "from": "gpt", "value": "The answer is Light Seafoam" } ], "clip_score": 23.34375, "loss": 2.464611291885376 }, { "id": "identity_66359", "image": "ScienceQA/images/11460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located to the west of the highlighted state across the Mississippi River?" }, { "from": "gpt", "value": "The answer is Minnesota" } ], "clip_score": 31.828125, "loss": 3.6388778686523438 }, { "id": "identity_126153", "image": "TextVQA/images/743742bf60eb3bc6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the pink console on the second shelf?" }, { "from": "gpt", "value": "The answer is Dreamcast" } ], "clip_score": 21.75, "loss": 3.3968684673309326 }, { "id": "identity_14400", "image": "A-OKVQA/images/52066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will the man have to grab to board the train?\nChoices:\n(A) jacket\n(B) suitcase\n(C) newspaper\n(D) newspaper" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 4.164914131164551 }, { "id": "identity_142580", "image": "DocVQA/images/khgw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the supplier company?" }, { "from": "gpt", "value": "The answer is Courtaulds Chemicals" } ], "clip_score": 23.296875, "loss": 2.5804805755615234 }, { "id": "identity_253734", "image": "UniGeo/images/1209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB,AC,BD是⊙O的切线,切点分别是P,C,D.若AC=5,BD=3,且AB=x,求证x的值,并计算△APB的面积。" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 30.484375, "loss": 3.61137318611145 }, { "id": "identity_229985", "image": "VQA-RAD/images/synpic42805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the density of masses in the upper lobes to the lower lobes and determine the ratio of the density of masses in the upper lobes to the lower lobes" }, { "from": "gpt", "value": "The answer is The ratio of the density of masses in the upper lobes to the lower lobes is greater than 1" } ], "clip_score": 20.71875, "loss": 1.341810941696167 }, { "id": "identity_64691", "image": "TQA/images/question_images/lewis_dot_diagrams_9138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of valence electrons represented in the diagram for the HCl molecule?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 30.1875, "loss": 2.1786398887634277 }, { "id": "identity_40648", "image": "FigureQA/images/91741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much does the Brown line exceed the Dark Blue line at x-axis label 100?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 33.78125, "loss": 1.7729252576828003 }, { "id": "identity_252316", "image": "Geometry3K/images/1765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If W P is a median and an angle bisector, A P = 3 y + 11, P H = 7 y - 5, m \\angle H W P = x + 12, m \\angle P A W = 3 x - 2, and m \\angle H W A = 4 x - 16, find the value of y" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.375, "loss": 2.4699881076812744 }, { "id": "identity_126707", "image": "VQA2.0/images/172370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the snowboard?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 24.8125, "loss": 3.1762962341308594 }, { "id": "identity_100716", "image": "TabMWP/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Austen needed a majority of 60% to win, did he achieve this with the seventh grade votes alone?" }, { "from": "gpt", "value": "The answer is No, he did not" } ], "clip_score": 27.84375, "loss": 2.9325692653656006 }, { "id": "identity_175661", "image": "TabMWP/images/24508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $635, what is the difference between quantity demanded and quantity supplied?" }, { "from": "gpt", "value": "The answer is 9,200" } ], "clip_score": 34.34375, "loss": 1.9923549890518188 }, { "id": "identity_250048", "image": "PMC-VQA/images/PMC7252828_fg003_179226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the given images, what are the key differences in the waveform characteristics between the successful ablation and the unsuccessful ablation, and how do these differences indicate the success or failure of the procedure?" }, { "from": "gpt", "value": "The answer is Regularity and stability in waveforms indicate successful ablation" } ], "clip_score": 28.71875, "loss": 3.6320834159851074 }, { "id": "identity_205057", "image": "PMC-VQA/images/PMC7541359_Fig2_267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is at the center of the outlined area?" }, { "from": "gpt", "value": "The answer is Brain tumor or mass" } ], "clip_score": 28.546875, "loss": 3.2029945850372314 }, { "id": "identity_65619", "image": "TQA/images/question_images/cycle_rock_6729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final stage of the rock cycle after Metamorphic Rock undergoes melting?" }, { "from": "gpt", "value": "The answer is Magma and Lava" } ], "clip_score": 36.8125, "loss": 1.7535417079925537 }, { "id": "identity_251194", "image": "TQA/images/abc_question_images/earth_day_night_12744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label refers to the latitude line that is closest to the North Pole and is also known as the Arctic Circle, and which label refers to the latitude line that is closest to the South Pole and is also known as the Antarctic Circle?" }, { "from": "gpt", "value": "The answer is H and U" } ], "clip_score": 28.328125, "loss": 2.696760892868042 }, { "id": "identity_333835", "image": "PMC-VQA/images/PMC6180882_fig1_380545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture this image?\nChoices:\n(A) MRI\n(B) PET scan\n(C) CT scan\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.6875, "loss": 4.014925956726074 }, { "id": "identity_248897", "image": "DocVQA/images/pnfm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation and department of Dr. Ray E. Brown, and which institution is he affiliated with?" }, { "from": "gpt", "value": "The answer is Associate Professor, Department of Community Medicine, Mt. Sinai School of Medicine" } ], "clip_score": 21.78125, "loss": 1.812076210975647 }, { "id": "identity_160645", "image": "GeoQA+/images/353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If l1 is parallel to l2 and l2 is parallel to l3, and the measure of angle 1 is 60 degrees, what is the measure of the supplementary angle to angle 1?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 29.28125, "loss": 1.389379620552063 }, { "id": "identity_19825", "image": "UniGeo/images/297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在四边形ABCD中,对角线AC⊥BD,垂足为O,点E,F,G,H分别为边AB,BC,CD,AD的中点.若AC=10,BD=6,则四边形EFGH的面积为()\nChoices:\n(A) 15\n(B) 20\n(C) 30\n(D) 60" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.625, "loss": 2.2822463512420654 }, { "id": "identity_66966", "image": "ScienceQA/images/16714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the South pole in Pair 2?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.0625, "loss": 3.3938698768615723 }, { "id": "identity_174771", "image": "TabMWP/images/31378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more photographs were taken on Friday compared to Monday?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.0, "loss": 2.422927141189575 }, { "id": "identity_299135", "image": "VQA2.0/images/503921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current time displayed on the clock?" }, { "from": "gpt", "value": "The answer is 11:10" } ], "clip_score": 21.0625, "loss": 1.6683094501495361 }, { "id": "identity_204436", "image": "TextVQA/images/14f625e7b3f9e25b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the channel that the person's finger is adjusting?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 21.453125, "loss": 2.4722557067871094 }, { "id": "identity_316279", "image": "TabMWP/images/34710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many homes were sold in Pleasant Town in 2005?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 36.65625, "loss": 1.9765956401824951 }, { "id": "identity_280703", "image": "FigureQA/images/67565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Gold or Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.609375, "loss": 4.052803039550781 }, { "id": "identity_337542", "image": "FigureQA/images/99717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5703125, "loss": 4.579974174499512 }, { "id": "identity_7828", "image": "UniGeo/images/1417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB与⊙O相切于点B,AO的延长线交⊙O于点C,连接BC,若∠A=42°,则∠C等于()\nChoices:\n(A) 30°\n(B) 42°\n(C) 48°\n(D) 24°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.859375, "loss": 2.74157452583313 }, { "id": "identity_70474", "image": "ScienceQA/images/14250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is located directly east of the state marked with a yellow dot?" }, { "from": "gpt", "value": "The answer is Nevada" } ], "clip_score": 34.65625, "loss": 2.5643768310546875 }, { "id": "identity_278900", "image": "TextVQA/images/22c32952b17e0cbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand of the wines that are being showcased?" }, { "from": "gpt", "value": "The answer is cadus" } ], "clip_score": 28.375, "loss": 4.8097944259643555 }, { "id": "identity_220641", "image": "VQA-AS/images/1652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.34375, "loss": 4.470035552978516 }, { "id": "identity_27260", "image": "TextVQA/images/7d92ffdafdbd3e65.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who makes that monitor?" }, { "from": "gpt", "value": "The answer is dell" } ], "clip_score": 28.234375, "loss": 3.3186252117156982 }, { "id": "identity_76036", "image": "AI2D/images/4853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the underground reservoir that holds magma?" }, { "from": "gpt", "value": "The answer is Magma Chamber" } ], "clip_score": 29.875, "loss": 2.31168532371521 }, { "id": "identity_201480", "image": "A-OKVQA/images/406646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the plant pots in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.71875, "loss": 3.6516404151916504 }, { "id": "identity_124585", "image": "VQA2.0/images/197584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the person in the image?" }, { "from": "gpt", "value": "The answer is Pitcher" } ], "clip_score": 25.171875, "loss": 2.559274435043335 }, { "id": "identity_83736", "image": "UniGeo/images/1841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠C=36°,直线l是△ABC的对称轴,则∠BAD的度数是多少?" }, { "from": "gpt", "value": "The answer is 108°" } ], "clip_score": 30.6875, "loss": 1.6914877891540527 }, { "id": "identity_211658", "image": "PMC-VQA/images/PMC5559574_Fig1_203935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the hip joint in the image?" }, { "from": "gpt", "value": "The answer is Lateral view of the right hip joint" } ], "clip_score": 32.59375, "loss": 2.8442487716674805 }, { "id": "identity_167202", "image": "TabMWP/images/30759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a senior ticket cost than a child ticket at the San Francisco Zoo?" }, { "from": "gpt", "value": "The answer is $3" } ], "clip_score": 37.71875, "loss": 1.8839834928512573 }, { "id": "identity_198922", "image": "PMC-VQA/images/PMC4009138_Fig3_285691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical imaging technique used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 27.8125, "loss": 2.944749116897583 }, { "id": "identity_140605", "image": "DocVQA/images/znyf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the memorandum?" }, { "from": "gpt", "value": "The answer is 24 Sept 74" } ], "clip_score": 22.015625, "loss": 2.9001805782318115 }, { "id": "identity_284154", "image": "FigureQA/images/70111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the least valuable option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.765625, "loss": 3.970742702484131 }, { "id": "identity_318501", "image": "IconQA/images/14319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings are there?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 21.15625, "loss": 2.3961408138275146 }, { "id": "identity_127062", "image": "TextVQA/images/6e6b232838ea3209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the monitor appear to have built-in speakers?" }, { "from": "gpt", "value": "The answer is No visible speakers" } ], "clip_score": 23.28125, "loss": 3.6655874252319336 }, { "id": "identity_273663", "image": "TabMWP/images/20923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each pond contains 2 frogs, how many frogs are there in a total of 4 ponds?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 35.84375, "loss": 1.8035516738891602 }, { "id": "identity_87115", "image": "Geometry3K/images/2081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of z in the triangle?" }, { "from": "gpt", "value": "The answer is z = √52 or approximately 7.21" } ], "clip_score": 35.84375, "loss": 2.242560863494873 }, { "id": "identity_22999", "image": "VQA2.0/images/516114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the food healthy?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.609375, "loss": 3.3804192543029785 }, { "id": "identity_233273", "image": "ScienceQA/images/17623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted ocean and list three countries that have coastlines along this ocean" }, { "from": "gpt", "value": "The answer is Arctic Ocean; Russia, Canada, Norway" } ], "clip_score": 30.59375, "loss": 2.792397975921631 }, { "id": "identity_246619", "image": "PMC-VQA/images/PMC5483845_Fig3_182553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are visualized in the ultrasound image when the probe is positioned for imaging the lumbar plexus, and how do they relate to the positioning of the probe?" }, { "from": "gpt", "value": "The answer is Greater sciatic foramen, sciatic nerve, GM, and ischium" } ], "clip_score": 35.0, "loss": 2.236041784286499 }, { "id": "identity_71091", "image": "AI2D/images/3993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the outermost layer of the sun as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Chromosphere" } ], "clip_score": 31.046875, "loss": 1.7413363456726074 }, { "id": "identity_213510", "image": "VQA2.0/images/323203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sports team is represented on the pennant being held?" }, { "from": "gpt", "value": "The answer is LA Kings" } ], "clip_score": 25.953125, "loss": 2.9551479816436768 }, { "id": "identity_136399", "image": "FigureQA/images/83350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of the Seafoam line at x-axis label 50?" }, { "from": "gpt", "value": "The answer is Approximately 46" } ], "clip_score": 27.375, "loss": 2.6852307319641113 }, { "id": "identity_200487", "image": "PMC-VQA/images/PMC5033377_pone.0162969.g002_76274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a difference in cell density between the top and bottom parts of figure D'?" }, { "from": "gpt", "value": "The answer is Yes, lower cell density in the bottom part" } ], "clip_score": 17.125, "loss": 2.611807107925415 }, { "id": "identity_164011", "image": "GeoQA+/images/3114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OC is perpendicular to AB at D and OB is the radius of the circle with a length of 10, what is the measure of angle ODB?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.59375, "loss": 1.513331651687622 }, { "id": "identity_324958", "image": "DVQA/images/bar_train_00060267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is the most preferred in any group?" }, { "from": "gpt", "value": "The answer is owner" } ], "clip_score": 25.875, "loss": 2.7938079833984375 }, { "id": "identity_314926", "image": "TabMWP/images/8515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A researcher working for a cereal company surveyed people about their cereal consumption habits. How many individuals consumed cereal at least 3 times?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 35.25, "loss": 2.347896099090576 }, { "id": "identity_107860", "image": "TextVQA/images/ca0fcef409fb0e98.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product on the bottle?" }, { "from": "gpt", "value": "The answer is F-ing Perfect Lawn" } ], "clip_score": 26.484375, "loss": 3.207989454269409 }, { "id": "identity_62120", "image": "FigureQA/images/55416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph have a legend explaining the colors?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.609375, "loss": 3.157691478729248 }, { "id": "identity_272374", "image": "GEOS/images/023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the value of x in the circle with the center indicated?\nChoices:\n(A) 100\n(B) 80\n(C) 50\n(D) 40" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.71875, "loss": 3.2470765113830566 }, { "id": "identity_261342", "image": "VQA2.0/images/69498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of people present in the picture?" }, { "from": "gpt", "value": "The answer is many" } ], "clip_score": 16.40625, "loss": 3.9020113945007324 }, { "id": "identity_117116", "image": "VizWiz/images/VizWiz_train_00012291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the leash?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.96875, "loss": 3.282548189163208 }, { "id": "identity_159244", "image": "GeoQA+/images/1351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a new factory E is built on road l2 such that DE is perpendicular to l2 and DE=3 km, what is the distance from E to point O?" }, { "from": "gpt", "value": "The answer is 5.83 km" } ], "clip_score": 28.28125, "loss": 1.9718517065048218 }, { "id": "identity_210332", "image": "VQA2.0/images/187559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the giraffes' spots?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 28.515625, "loss": 3.49316668510437 }, { "id": "identity_329405", "image": "PMC-VQA/images/PMC7051105_FIG2_126949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrow in this image?\nChoices:\n(A) Torn muscle\n(B) Eighth arch fracture\n(C) Dislocated joint\n(D) Enlarged intercostal space" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.328125, "loss": 4.355084419250488 }, { "id": "identity_2530", "image": "A-OKVQA/images/11576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of weather is this?\nChoices:\n(A) tsunami\n(B) rainy\n(C) sunny\n(D) clear skies" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9609375, "loss": 4.190784931182861 }, { "id": "identity_288247", "image": "Geometry3K/images/928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of y.\nOptions:\n(A) 7\n(B) 8\n(C) 12\n(D) 17" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.21875, "loss": 2.5125670433044434 }, { "id": "identity_135425", "image": "DocVQA/images/ftxv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predicted accuracy of the equation for yield reductions when kochia density is less than 20 plants per 100 feet of row?" }, { "from": "gpt", "value": "The answer is Within 5%" } ], "clip_score": 17.296875, "loss": 3.123460054397583 }, { "id": "identity_192509", "image": "A-OKVQA/images/376246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 16.4375, "loss": 2.817742109298706 }, { "id": "identity_328614", "image": "PMC-VQA/images/PMC3784059_Fig3_233800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the images?\nChoices:\n(A) MRI scan\n(B) CT scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 5.241555213928223 }, { "id": "identity_307042", "image": "A-OKVQA/images/349822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sport involving multiple people is being played?\nChoices:\n(A) cricket\n(B) frisbee\n(C) tennis\n(D) badminton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.78125, "loss": 4.191656112670898 }, { "id": "identity_271229", "image": "TQA/images/abc_question_images/human_system_respiratory_11326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that represents the pharynx:\n(A) M\n(B) F\n(C) K\n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.875, "loss": 3.9615204334259033 }, { "id": "identity_297044", "image": "AI2D/images/4221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What geological process is represented by point M in the rock cycle diagram?\nChoices:\n(A) Crystallization\n(B) Lithification\n(C) Metamorphism\n(D) Melting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.515625, "loss": 3.950136184692383 }, { "id": "identity_129386", "image": "VQA2.0/images/409949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more people in the water or on the surfboards?" }, { "from": "gpt", "value": "The answer is In the water" } ], "clip_score": 30.6875, "loss": 2.3297245502471924 }, { "id": "identity_198158", "image": "A-OKVQA/images/186175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the truck?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.4375, "loss": 2.1805503368377686 }, { "id": "identity_155181", "image": "ScienceQA/images/15490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of both objects in the image?" }, { "from": "gpt", "value": "The answer is Adornment/Decoration" } ], "clip_score": 25.375, "loss": 3.208693265914917 }, { "id": "identity_180830", "image": "TabMWP/images/23982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people ate an even number of bananas last week?" }, { "from": "gpt", "value": "The answer is 47 people" } ], "clip_score": 35.0625, "loss": 2.9126150608062744 }, { "id": "identity_9555", "image": "VizWiz/images/VizWiz_train_00011344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the gas on?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.78125, "loss": 3.4606125354766846 }, { "id": "identity_78113", "image": "TQA/images/abc_question_images/skin_cross_section_17580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"T\"?" }, { "from": "gpt", "value": "The answer is Sweat production" } ], "clip_score": 19.578125, "loss": 3.53722882270813 }, { "id": "identity_49707", "image": "ChartQA/images/77764176007706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average DALYs rate for Czechia over the 10-year period?" }, { "from": "gpt", "value": "The answer is 400" } ], "clip_score": 24.921875, "loss": 2.3555779457092285 }, { "id": "identity_229082", "image": "A-OKVQA/images/302343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats in the image have a white stripe wrapped around them, and what is the total number of boats in the image?" }, { "from": "gpt", "value": "The answer is Five boats have a white stripe, and there are five boats in total" } ], "clip_score": 23.546875, "loss": 1.945894718170166 }, { "id": "identity_85667", "image": "UniGeo/images/732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the x-coordinate of point A in the given image?" }, { "from": "gpt", "value": "The answer is -4" } ], "clip_score": 32.34375, "loss": 2.5636236667633057 }, { "id": "identity_303223", "image": "TextVQA/images/0f8c494d3ab11b03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the creator of this classic piece?" }, { "from": "gpt", "value": "The answer is eric milnes" } ], "clip_score": 9.7578125, "loss": 3.133040428161621 }, { "id": "identity_277040", "image": "DVQA/images/bar_train_00075232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm in the dataset has a smaller cost: the algorithm school or the algorithm road in the dataset canvas, based on their accuracy?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.890625, "loss": 4.215084552764893 }, { "id": "identity_267949", "image": "ScienceQA/images/17376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind removing broken eggshells from the nest to enhance the reproductive success of a black-headed gull? Select the claim below that best answers this question and is supported by the passage.\nRemoving broken eggshells from the nest increases the chances that (). \nChoices:\n(A) the gull's chicks will get food\n(B) the gull's offspring will survive\n(C) the gull will be away from its offspring at a given time" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 3.0915756225585938 }, { "id": "identity_168584", "image": "TabMWP/images/5127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more loaves were left over on Monday compared to Thursday?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.4375, "loss": 2.1798014640808105 }, { "id": "identity_332850", "image": "PMC-VQA/images/PMC3320613_pgen-1002631-g004_132962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was likely used to create this image?\nChoices:\n(A) Light microscopy\n(B) Electron microscopy\n(C) X-ray microscopy\n(D) Confocal microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.984375, "loss": 4.264127731323242 }, { "id": "identity_314288", "image": "DocVQA/images/txdw0217_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this number?" }, { "from": "gpt", "value": "The answer is DWRITE 077660" } ], "clip_score": 17.21875, "loss": 3.0309743881225586 }, { "id": "identity_313094", "image": "UniGeo/images/189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知E是菱形ABCD的边BC上一点,且∠DAE=∠B=80°,那么∠CDE的度数为多少?\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.484375, "loss": 2.614733934402466 }, { "id": "identity_285506", "image": "PMC-VQA/images/PMC4495713_Fig4_404283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the first observation, which part of the section exhibits more intense staining?\nChoices:\n(A) Notochordal sheath\n(B) Lumen\n(C) Outer layer\n(D) Connective tissue" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 3.9323794841766357 }, { "id": "identity_58398", "image": "DocVQA/images/nmwn0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the publication Amber Howard is associated with?" }, { "from": "gpt", "value": "The answer is Bone & Joint Surgery" } ], "clip_score": 20.953125, "loss": 2.122703790664673 }, { "id": "identity_308072", "image": "A-OKVQA/images/164569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which entity is most likely associated with the tallest structure shown?\nChoices:\n(A) willis\n(B) chrysler\n(C) sears\n(D) hsbc" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.953125, "loss": 3.923785448074341 }, { "id": "identity_126668", "image": "TextVQA/images/ef5d49ad66e286f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the typewriter?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 29.9375, "loss": 3.2468066215515137 }, { "id": "identity_218570", "image": "PMC-VQA/images/PMC6033919_Fig1_338731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the foraminifer specimen shown in a single or multiple chambered form?" }, { "from": "gpt", "value": "The answer is Multiple chambered form" } ], "clip_score": 23.5, "loss": 2.2354540824890137 }, { "id": "identity_30269", "image": "AI2D/images/821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what happen if all butterfly dies\nChoices:\n(A) fish decreases\n(B) plants decreases\n(C) frog increases\n(D) frog decreases" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 40.96875, "loss": 3.1904239654541016 }, { "id": "identity_229456", "image": "A-OKVQA/images/208408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual indicators in the image that suggest the weather condition, and how do they support the phrase \"downpour\"?" }, { "from": "gpt", "value": "The answer is Umbrella, wet pavement, reflections, overcast sky" } ], "clip_score": 31.453125, "loss": 2.9419021606445312 }, { "id": "identity_245332", "image": "PMC-VQA/images/PMC8876035_jcm-11-01002-f001_205890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of increased signals observed in the midbrain on axial FLAIR images, and what potential conditions could this indicate?" }, { "from": "gpt", "value": "The answer is Multiple sclerosis, stroke, tumors, infections, or neurodegenerative diseases" } ], "clip_score": 35.71875, "loss": 1.9595942497253418 }, { "id": "identity_183603", "image": "PMC-VQA/images/PMC7662352_molecules-25-05072-f008_297053.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might the red spots in the image indicate?" }, { "from": "gpt", "value": "The answer is Areas of high metabolic activity or abnormalities" } ], "clip_score": 30.4375, "loss": 2.9523987770080566 }, { "id": "identity_224828", "image": "TextVQA/images/a1a0e5c06f9c7bda.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of unique characters used in the first line, and what are they?" }, { "from": "gpt", "value": "The answer is 7 unique characters: W, e, h, a, v, p, r" } ], "clip_score": 18.78125, "loss": 1.439662218093872 }, { "id": "identity_123289", "image": "PMC-VQA/images/PMC7007649_Fig1_113538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which vertebral level is the lowest visible screw placed?" }, { "from": "gpt", "value": "The answer is L4 level" } ], "clip_score": 28.34375, "loss": 2.8670766353607178 }, { "id": "identity_39090", "image": "FigureQA/images/27638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 31.34375, "loss": 3.723736524581909 }, { "id": "identity_98782", "image": "IconQA/images/22188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both purple and circles?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.65625, "loss": 2.3246383666992188 }, { "id": "identity_65948", "image": "ScienceQA/images/9569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items on Troy's list are vegetables?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.65625, "loss": 2.4703378677368164 }, { "id": "identity_288064", "image": "PMC-VQA/images/PMC7225200_Fig4_170316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the vertical height depicted in the views provided?\nChoices:\n(A) 8 mm\n(B) 12 mm\n(C) 14 mm\n(D) 18 mm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.96875, "loss": 3.244612216949463 }, { "id": "identity_61926", "image": "DocVQA/images/jzhb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the zip code of the institution the letter is addressed to?" }, { "from": "gpt", "value": "The answer is 27515" } ], "clip_score": 20.09375, "loss": 2.03428053855896 }, { "id": "identity_304984", "image": "FigureQA/images/40016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.953125, "loss": 4.0108513832092285 }, { "id": "identity_145964", "image": "DocVQA/images/gljd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city is the department located in?" }, { "from": "gpt", "value": "The answer is WASHINGTON, D.C" } ], "clip_score": 19.828125, "loss": 1.8075040578842163 }, { "id": "identity_56111", "image": "MapQA/images/map_2250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 40-50" } ], "clip_score": 21.890625, "loss": 2.015972852706909 }, { "id": "identity_50947", "image": "FigureQA/images/53769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion of the pie chart?" }, { "from": "gpt", "value": "The answer is Royal Blue" } ], "clip_score": 36.1875, "loss": 3.254394769668579 }, { "id": "identity_111781", "image": "VQA-AS/images/14736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the curtain in the window?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 27.140625, "loss": 3.6849257946014404 }, { "id": "identity_286322", "image": "IconQA/images/17300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the boundary of the shape?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 31.625, "loss": 2.3534514904022217 }, { "id": "identity_287950", "image": "VQA2.0/images/114473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of horses that are currently pulling the cart?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.328125, "loss": 2.026686906814575 }, { "id": "identity_18172", "image": "UniGeo/images/288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在平行四边形ABCD中,AE⊥BC于E,AF⊥CD于F,∠ABC=75°,则∠EAF的度数为()\nChoices:\n(A) 60°\n(B) 65°\n(C) 70°\n(D) 75°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.34375, "loss": 2.338463544845581 }, { "id": "identity_180395", "image": "TabMWP/images/27346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Colton has $0.10, which items can he afford to buy without exceeding his budget?" }, { "from": "gpt", "value": "The answer is flat-head bolt, four-inch nail, small metal hook" } ], "clip_score": 34.125, "loss": 1.5173425674438477 }, { "id": "identity_71846", "image": "TQA/images/question_images/radioactive_decay_8177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What element does the daughter nucleus represent after the alpha decay of Uranium-238?" }, { "from": "gpt", "value": "The answer is Thorium" } ], "clip_score": 35.875, "loss": 2.3054752349853516 }, { "id": "identity_283895", "image": "PMC-VQA/images/PMC7795385_jcm-10-00030-f001_338616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the cyst located?\nChoices:\n(A) Left Maxilla\n(B) Right Maxilla\n(C) Left Mandible\n(D) Right Mandible" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.984375, "loss": 3.9243388175964355 }, { "id": "identity_100409", "image": "TabMWP/images/10632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the students in applied majors are majoring in Civil engineering?" }, { "from": "gpt", "value": "The answer is 28.16%" } ], "clip_score": 33.40625, "loss": 1.7890666723251343 }, { "id": "identity_3993", "image": "PMC-VQA/images/PMC7594485_fig1-2055116920957196_284900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the green arrow point to in the image? \nChoices:\n(A) Portal vein \n(B) Normal spleen tissue \n(C) Mixed echogenic mass \n(D) Splenic capsule " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.96875, "loss": 4.631892204284668 }, { "id": "identity_237960", "image": "UniGeo/images/2134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知梯形ABCD,AD∥BC,AD=DC=4,BC=8,点N在BC上,CN=2,E是AB中点,在AC上找一点M使EM+MN的值最小,此时其最小值一定等于多少?请证明此最小值的计算过程,并求出M点的具体坐标。" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.25, "loss": 2.9679946899414062 }, { "id": "identity_203399", "image": "TextVQA/images/0ae44959cb83e7b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player second from the left?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 19.703125, "loss": 2.2304799556732178 }, { "id": "identity_72954", "image": "AI2D/images/2297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the life cycle depicted in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.453125, "loss": 2.339702844619751 }, { "id": "identity_114464", "image": "Super-CLEVR/images/superCLEVR_new_001441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest bicycle in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.859375, "loss": 3.606287717819214 }, { "id": "identity_213768", "image": "TextVQA/images/615012a8dd5382da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the awnings on the second floor of the building?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.203125, "loss": 3.95329213142395 }, { "id": "identity_124200", "image": "A-OKVQA/images/540554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 12.5390625, "loss": 3.140516757965088 }, { "id": "identity_111521", "image": "PMC-VQA/images/PMC4937406_f3_49165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement unit in the image?" }, { "from": "gpt", "value": "The answer is micrometers (µm)" } ], "clip_score": 15.359375, "loss": 1.7979145050048828 }, { "id": "identity_258881", "image": "PMC-VQA/images/PMC4237798_F1_337847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in this MRI image, and what is their significance in diagnosing neurological conditions?" }, { "from": "gpt", "value": "The answer is Ventricles, brain parenchyma, sulci, and gyri; diagnostic significance" } ], "clip_score": 33.625, "loss": 2.846682548522949 }, { "id": "identity_9425", "image": "TextVQA/images/8cd6412c8f6ad3fa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what branch does this carrier belong to?" }, { "from": "gpt", "value": "The answer is air national guard" } ], "clip_score": 29.875, "loss": 2.606632947921753 }, { "id": "identity_113497", "image": "A-OKVQA/images/203702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of location is depicted in the image?" }, { "from": "gpt", "value": "The answer is Drive-through safari park" } ], "clip_score": 29.90625, "loss": 3.2735495567321777 }, { "id": "identity_40304", "image": "FigureQA/images/90930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Cyan at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.578125, "loss": 2.510484457015991 }, { "id": "identity_293482", "image": "TQA/images/question_images/cycle_water_4927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Before evaporation, where does ground runoff water gather?\nChoices:\n(A) River\n(B) Snow\n(C) Sea\n(D) Rain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.53125, "loss": 3.958263874053955 }, { "id": "identity_12758", "image": "AI2D/images/4297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given image which layer lies above the upper mantle?\nChoices:\n(A) oceanic crust\n(B) continental crust\n(C) plastic asthenosphere\n(D) lithosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.09375, "loss": 4.598475933074951 }, { "id": "identity_286850", "image": "DocVQA/images/mrlm0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the Congress?" }, { "from": "gpt", "value": "The answer is RIOCENTRO" } ], "clip_score": 19.796875, "loss": 2.8085203170776367 }, { "id": "identity_325112", "image": "TabMWP/images/12435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many gardens have a number of plants between 51 and 86?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 35.4375, "loss": 2.5353806018829346 }, { "id": "identity_129679", "image": "TextVQA/images/0361af3b7b625275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.390625, "loss": 4.047550201416016 }, { "id": "identity_325660", "image": "TQA/images/abc_question_images/optics_lense_types_19163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the item.\nChoices:\n(A) D\n(B) L\n(C) K\n(D) C" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.140625, "loss": 2.996232032775879 }, { "id": "identity_173544", "image": "TabMWP/images/26194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did it take to reduce the record time from 28 minutes to 21 minutes?" }, { "from": "gpt", "value": "The answer is 5 years" } ], "clip_score": 28.3125, "loss": 2.234586715698242 }, { "id": "identity_280409", "image": "AI2D/images/3719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ in the body produces a bodily juice known as bile?\nChoices:\n(A) E\n(B) O\n(C) G\n(D) J" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.453125, "loss": 3.743201494216919 }, { "id": "identity_331449", "image": "PMC-VQA/images/PMC3948908_Fig6_273122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to obtain the image shown in (d)?\nChoices:\n(A) Ultrasound\n(B) CT scan\n(C) X-ray\n(D) MR scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.75, "loss": 3.941135883331299 }, { "id": "identity_47654", "image": "FigureQA/images/44534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the Medium Seafoam and Gold bars?" }, { "from": "gpt", "value": "The answer is Approximately 40" } ], "clip_score": 26.5, "loss": 2.8692829608917236 }, { "id": "identity_47613", "image": "MapQA/images/map_7121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 25-30" } ], "clip_score": 26.375, "loss": 2.122241258621216 }, { "id": "identity_99603", "image": "IconQA/images/20131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the ruler is covered by the line?" }, { "from": "gpt", "value": "The answer is 3/5" } ], "clip_score": 31.546875, "loss": 1.6495349407196045 }, { "id": "identity_28359", "image": "PMC-VQA/images/PMC5583184_Fig5_211156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the displayed structure on the reference images? \nChoices:\n(A) Corpus Callosum \n(B) Corticospinal tract \n(C) Amygdala \n(D) Thalamus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.765625, "loss": 4.457826614379883 }, { "id": "identity_239052", "image": "PMC-VQA/images/PMC4269278_fig1_345313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristics of the masses observed in both breasts suggest a diagnosis of innumerable lobulated enhancing masses rather than normal breast tissue, abscesses, or cysts?" }, { "from": "gpt", "value": "The answer is Lobulated and enhancing masses suggest innumerable lobulated enhancing masses" } ], "clip_score": 26.28125, "loss": 2.0898258686065674 }, { "id": "identity_97397", "image": "IconQA/images/36684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were extended to the 8 cm mark, how much longer would it be in centimeters?" }, { "from": "gpt", "value": "The answer is 0.8 centimeters" } ], "clip_score": 31.328125, "loss": 1.6718833446502686 }, { "id": "identity_19183", "image": "ChartQA/images/two_col_24382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people that are at least somewhat satisfied?" }, { "from": "gpt", "value": "The answer is 84" } ], "clip_score": 27.71875, "loss": 2.8938488960266113 }, { "id": "identity_5752", "image": "VQA-AS/images/7845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is next to the fireplace on the floor?" }, { "from": "gpt", "value": "The answer is dog bed" } ], "clip_score": 33.40625, "loss": 3.913342237472534 }, { "id": "identity_333499", "image": "DocVQA/images/rncd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the publication identifier?" }, { "from": "gpt", "value": "The answer is Publication 386" } ], "clip_score": 16.78125, "loss": 3.1030993461608887 }, { "id": "identity_11396", "image": "AI2D/images/293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, if all the heliotropus were to die, what organism would be directly affected?\nChoices:\n(A) Flapeelia\n(B) Planton\n(C) Squat jellies\n(D) Mud foot" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.84375, "loss": 3.8754658699035645 }, { "id": "identity_158553", "image": "TQA/images/question_images/periodic_table_8160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element has an atomic mass of approximately 55.85?" }, { "from": "gpt", "value": "The answer is Iron (Fe)" } ], "clip_score": 24.59375, "loss": 1.7916834354400635 }, { "id": "identity_52223", "image": "FigureQA/images/96150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value for Royal Blue at x-axis label 50?" }, { "from": "gpt", "value": "The answer is 92" } ], "clip_score": 27.9375, "loss": 3.0838797092437744 }, { "id": "identity_181348", "image": "CLEVR-Math/images/CLEVR_train_019515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more spheres or cylinders in the image?" }, { "from": "gpt", "value": "The answer is More spheres" } ], "clip_score": 29.765625, "loss": 2.6893246173858643 }, { "id": "identity_163891", "image": "GeoQA+/images/3573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠BAD?" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 33.59375, "loss": 2.338033437728882 }, { "id": "identity_66820", "image": "ScienceQA/images/16033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the latitude range of the outlined area shown?" }, { "from": "gpt", "value": "The answer is 20°N to 0°" } ], "clip_score": 22.40625, "loss": 2.122753381729126 }, { "id": "identity_61743", "image": "FigureQA/images/67481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value of the Medium Aqua line?" }, { "from": "gpt", "value": "The answer is 61" } ], "clip_score": 26.890625, "loss": 2.723227024078369 }, { "id": "identity_278323", "image": "A-OKVQA/images/364075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do Giraffes typically rest? \n(A) museum\n(B) park\n(C) car lot\n(D) racetrack" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.671875, "loss": 3.9741828441619873 }, { "id": "identity_333200", "image": "DVQA/images/bar_train_00112475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have an accuracy above 3 in at least one dataset?" }, { "from": "gpt", "value": "The answer is eight" } ], "clip_score": 32.0, "loss": 4.136719703674316 }, { "id": "identity_227306", "image": "A-OKVQA/images/426026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify the season and describe three visual indicators that support your conclusion" }, { "from": "gpt", "value": "The answer is Fall, red/brown leaves, light jackets, trees losing leaves" } ], "clip_score": 18.609375, "loss": 3.1580562591552734 }, { "id": "identity_318336", "image": "AI2D/images/2847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the Olfactory Nerves start?\nChoices:\n(A) Olfactory Bulb\n(B) Ovary\n(C) Heart\n(D) Mouth" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.8125, "loss": 3.9835057258605957 }, { "id": "identity_92210", "image": "IconQA/images/15388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the letter 'Q' in grid squares?" }, { "from": "gpt", "value": "The answer is 12 grid squares" } ], "clip_score": 31.375, "loss": 1.7191557884216309 }, { "id": "identity_262194", "image": "TQA/images/question_images/biomes_6569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which biome predominantly covers Russia? \n(A) Subtropical Rainforest\n(B) Taiga\n(C) Xeric Shrubland\n(D) Temperate Broadleafs Forest" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.015625, "loss": 4.049761772155762 }, { "id": "identity_82241", "image": "Geometry3K/images/1955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the two polygons are similar, what is the length of side TU?" }, { "from": "gpt", "value": "The answer is TU = 18 m" } ], "clip_score": 30.984375, "loss": 2.099830389022827 }, { "id": "identity_242952", "image": "TabMWP/images/38307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the rate of change between each pair of points in the table and verify if the rate of change is constant. If it is not constant, calculate the average rate of change for the entire dataset" }, { "from": "gpt", "value": "The answer is The rate of change is not constant, average rate of change is -3.5" } ], "clip_score": 26.046875, "loss": 2.0270328521728516 }, { "id": "identity_76747", "image": "AI2D/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal is a tertiary consumer in this food web?" }, { "from": "gpt", "value": "The answer is Snowy Owl" } ], "clip_score": 30.84375, "loss": 2.8418943881988525 }, { "id": "identity_182878", "image": "A-OKVQA/images/225299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any spectators watching the skier?" }, { "from": "gpt", "value": "The answer is Yes, there are spectators" } ], "clip_score": 27.140625, "loss": 2.657707929611206 }, { "id": "identity_95173", "image": "CLEVR-Math/images/CLEVR_train_009426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither metallic nor spherical?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.84375, "loss": 2.449185609817505 }, { "id": "identity_270795", "image": "A-OKVQA/images/380348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is designed to rock back and forth?\n(A) chair\n(B) cabinet\n(C) table\n(D) stool" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2578125, "loss": 4.173376560211182 }, { "id": "identity_206777", "image": "PMC-VQA/images/PMC6522109_F2_481353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in this tissue sample?" }, { "from": "gpt", "value": "The answer is No clear indications of pathology based on image alone" } ], "clip_score": 33.8125, "loss": 3.254716396331787 }, { "id": "identity_287859", "image": "PMC-VQA/images/PMC3457007_F5_156995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the field of view from section D?\nChoices:\n(A) Projections from the contralateral eye \n(B) Outlines of the entire dLGN (red) \n(C) Retinal projections to the optic tectum \n(D) Holes in the projections from the ipsilateral eye" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.65625, "loss": 4.02751350402832 }, { "id": "identity_279870", "image": "ScienceQA/images/11461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Peridotite falls under which category: mineral or rock?\nChoices:\n(A) mineral\n(B) rock" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.40625, "loss": 3.884478807449341 }, { "id": "identity_122301", "image": "A-OKVQA/images/16574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is being depicted in the image?" }, { "from": "gpt", "value": "The answer is A medieval-themed jousting tournament" } ], "clip_score": 28.484375, "loss": 2.7531397342681885 }, { "id": "identity_112738", "image": "TextVQA/images/d6d0a6b7e81a519b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition shown on the screen of the blue phone?" }, { "from": "gpt", "value": "The answer is Mostly Sunny" } ], "clip_score": 21.609375, "loss": 3.234435796737671 }, { "id": "identity_134724", "image": "DocVQA/images/kzbn0226_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific methodology was used for the peer review, according to the committee?" }, { "from": "gpt", "value": "The answer is Not determined by the committee" } ], "clip_score": 28.96875, "loss": 3.489927291870117 }, { "id": "identity_39298", "image": "ChartQA/images/two_col_83167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any region have exactly one-third of the revenue share?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.9375, "loss": 3.095891237258911 }, { "id": "identity_273113", "image": "VQA2.0/images/315176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of open umbrellas?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.25, "loss": 2.092350959777832 }, { "id": "identity_312514", "image": "DVQA/images/bar_train_00171204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any negative values in this chart?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.3125, "loss": 3.5984275341033936 }, { "id": "identity_322711", "image": "GeoQA+/images/5394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, given that line \\( l_1 \\parallel l_2 \\) and \\( \\triangle ABC \\) is an equilateral triangle, if \\( \\angle 1 = 40^\\circ \\), what is the measure of \\( \\angle 2 \\)?\n\nChoices:\n(A) 60°\n(B) 80°\n(C) 90°\n(D) 100°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.46875, "loss": 2.8135242462158203 }, { "id": "identity_301344", "image": "TabMWP/images/16949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Samantha, who has some fruit trees growing in her yard, kept track of how many pieces of fruit she picked this year. What fraction of the pieces of fruit picked were plums? Simplify your answer." }, { "from": "gpt", "value": "The answer is 28/87" } ], "clip_score": 35.3125, "loss": 2.5439178943634033 }, { "id": "identity_227311", "image": "TQA/images/abc_question_images/human_system_eye_16130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure responsible for controlling the amount of light entering the eye and describe its location relative to the cornea and lens" }, { "from": "gpt", "value": "The answer is The iris is located between the cornea and the lens" } ], "clip_score": 30.546875, "loss": 1.9547419548034668 }, { "id": "identity_227014", "image": "VQA2.0/images/17472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vehicles and animals are present on the street?" }, { "from": "gpt", "value": "The answer is Three types (car, motorcycle, cows)" } ], "clip_score": 25.421875, "loss": 2.527909517288208 }, { "id": "identity_77183", "image": "ScienceQA/images/6519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which longitude line intersects with the 70°N latitude line within the outlined area?" }, { "from": "gpt", "value": "The answer is 100°W" } ], "clip_score": 21.671875, "loss": 1.213294506072998 }, { "id": "identity_309319", "image": "VQA-AS/images/8928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the artwork a portrait?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.921875, "loss": 4.253300189971924 }, { "id": "identity_212135", "image": "A-OKVQA/images/499911.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fringed leather saddlebags are visible on the motorcycle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.53125, "loss": 2.3212664127349854 }, { "id": "identity_63735", "image": "ScienceQA/images/21106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first stage in the life cycle of a pine tree as shown in the image?" }, { "from": "gpt", "value": "The answer is seedling" } ], "clip_score": 37.6875, "loss": 2.8369271755218506 }, { "id": "identity_326797", "image": "PMC-VQA/images/PMC6085685_Fig2_352459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is present in these canals? \nChoices:\n(A) they are narrowed \n(B) they are absent \n(C) they are widened \n(D) they are twisted" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.5625, "loss": 5.472019195556641 }, { "id": "identity_223713", "image": "A-OKVQA/images/526922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus involved in the accident on the left?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 29.390625, "loss": 3.704367160797119 }, { "id": "identity_55667", "image": "FigureQA/images/73217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Aqua at x-axis label 50?" }, { "from": "gpt", "value": "The answer is 99.6" } ], "clip_score": 19.625, "loss": 2.622486114501953 }, { "id": "identity_220613", "image": "Super-CLEVR/images/superCLEVR_new_000778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have exactly three wheels?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 20.078125, "loss": 2.6139018535614014 }, { "id": "identity_85645", "image": "UniGeo/images/2405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if AB = 5 and BC = 10, and the height AM from A to BC is 4, what is the area of the parallelogram?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 33.625, "loss": 2.2311952114105225 }, { "id": "identity_165575", "image": "GeoQA+/images/2518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In parallelogram ABCD, if BE = 6 and ED = 10, what is the length of side AD?" }, { "from": "gpt", "value": "The answer is 16 units" } ], "clip_score": 36.71875, "loss": 2.4424242973327637 }, { "id": "identity_242419", "image": "DocVQA/images/fjyg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date and who is the memorandum from?" }, { "from": "gpt", "value": "The answer is March 24, 1958, from LeRoy Voris, Executive Secretary" } ], "clip_score": 28.875, "loss": 2.5159695148468018 }, { "id": "identity_148691", "image": "DocVQA/images/myxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the two-letter state abbreviation for the location of Duke University Medical Center?" }, { "from": "gpt", "value": "The answer is NC" } ], "clip_score": 21.703125, "loss": 3.5082366466522217 }, { "id": "identity_116970", "image": "Super-CLEVR/images/superCLEVR_new_000076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have wheels in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 27.375, "loss": 2.1013412475585938 }, { "id": "identity_181345", "image": "CLEVR-Math/images/CLEVR_train_019515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cylinders are there in total?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.109375, "loss": 2.3008196353912354 }, { "id": "identity_89373", "image": "Geometry3K/images/1931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If YJ is parallel to VT and ZT = 18, what is the length of segment YT?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 31.453125, "loss": 1.9678432941436768 }, { "id": "identity_1531", "image": "DocVQA/images/xjjp0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for submission for March meeting?" }, { "from": "gpt", "value": "The answer is february 15" } ], "clip_score": 21.90625, "loss": 2.8031656742095947 }, { "id": "identity_45515", "image": "DocVQA/images/jnjm0223_60.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the country of the study titled 'Australia Steps'?" }, { "from": "gpt", "value": "The answer is Australia" } ], "clip_score": 14.875, "loss": 2.83333158493042 }, { "id": "identity_249279", "image": "ScienceQA/images/13668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 10 mL of each solution is mixed together, what will be the concentration of blue particles in the resulting solution?" }, { "from": "gpt", "value": "The answer is 0.25 particles/mL" } ], "clip_score": 34.625, "loss": 2.07637619972229 }, { "id": "identity_327392", "image": "AI2D/images/1506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the stage where water freezes and expands, causing the crack to widen?\nChoices:\n(A) E\n(B) B\n(C) G\n(D) H" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.375, "loss": 4.04100227355957 }, { "id": "identity_12804", "image": "TabMWP/images/26576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The receptionist at a doctor's office kept track of each patient's wait time. How many people waited for exactly 43 minutes?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.09375, "loss": 2.1266977787017822 }, { "id": "identity_48601", "image": "FigureQA/images/5733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the larger portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Orange Red" } ], "clip_score": 33.5625, "loss": 2.689403772354126 }, { "id": "identity_5192", "image": "TextVQA/images/3658ba6668c2255b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letters are on the musician's shirt?" }, { "from": "gpt", "value": "The answer is pe" } ], "clip_score": 16.84375, "loss": 4.645306587219238 }, { "id": "identity_160668", "image": "GeoQA+/images/3868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of arc AC?" }, { "from": "gpt", "value": "The answer is 248°" } ], "clip_score": 29.921875, "loss": 2.907196521759033 }, { "id": "identity_152621", "image": "ScienceQA/images/5485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic do both the tissue and feather share regarding their texture?" }, { "from": "gpt", "value": "The answer is Both have a soft and delicate texture" } ], "clip_score": 35.28125, "loss": 2.9305505752563477 }, { "id": "identity_285000", "image": "IconQA/images/11636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangles present?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 29.296875, "loss": 1.9337654113769531 }, { "id": "identity_207275", "image": "PMC-VQA/images/PMC7561662_Fig13_274752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible abnormality in the image?" }, { "from": "gpt", "value": "The answer is No visible abnormality" } ], "clip_score": 26.4375, "loss": 3.076289653778076 }, { "id": "identity_301055", "image": "UniGeo/images/684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆中,OC垂直于AB,∠OBA=26°,则∠ADC的度数是()\nChoices:\n(A) 64°\n(B) 58°\n(C) 32°\n(D) 26°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.421875, "loss": 2.8178250789642334 }, { "id": "identity_50330", "image": "ChartQA/images/multi_col_978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the population aged 0-14 years in 2012?" }, { "from": "gpt", "value": "The answer is 14.63%" } ], "clip_score": 29.09375, "loss": 1.7411961555480957 }, { "id": "identity_30075", "image": "A-OKVQA/images/503832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is closest to the giraffe?\nChoices:\n(A) baby eel\n(B) hat\n(C) zebra\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 4.221383094787598 }, { "id": "identity_303941", "image": "PMC-VQA/images/PMC5880870_Fig2_296385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the warm color indicate in the image?\nChoices:\n(A) Decreased rsFC in schizophrenia patients \n(B) Increased rsFC in schizophrenia patients \n(C) No rsFC in schizophrenia patients \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.015625, "loss": 4.3559889793396 }, { "id": "identity_51950", "image": "DocVQA/images/frjf0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of reporting services are mentioned in point 4?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.859375, "loss": 3.970439910888672 }, { "id": "identity_7507", "image": "TQA/images/question_images/waves_interactions_interference_9300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What degree out of phase causes a destructive interference?\nChoices:\n(A) 180 degree\n(B) 180 degrees\n(C) 100 degree\n(D) 18 degree" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.46875, "loss": 3.31274151802063 }, { "id": "identity_179480", "image": "TabMWP/images/4960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who contributed the highest amount for the gas costs and by how much more than the next highest contributor?" }, { "from": "gpt", "value": "The answer is Debbie, 140 more" } ], "clip_score": 34.1875, "loss": 3.0243818759918213 }, { "id": "identity_161590", "image": "UniGeo/images/3170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if the ratio of BF to BC is 1/3, what is the ratio of AF to AB?" }, { "from": "gpt", "value": "The answer is 1/3" } ], "clip_score": 34.53125, "loss": 1.4196763038635254 }, { "id": "identity_44230", "image": "FigureQA/images/58110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the y-axis on the graph?" }, { "from": "gpt", "value": "The answer is y-axis label" } ], "clip_score": 28.875, "loss": 2.6518497467041016 }, { "id": "identity_11513", "image": "TabMWP/images/24676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Abigail went on a camping trip and logged the number of miles she hiked each day. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.5, "loss": 2.168696165084839 }, { "id": "identity_283715", "image": "ScienceQA/images/7035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from the surroundings to each cake or from each cake to the surroundings?\nChoices:\n(A) the surroundings . . . each cake\n(B) each cake . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.28125, "loss": 3.916703701019287 }, { "id": "identity_94194", "image": "TabMWP/images/15531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more did Brenner's Juices make on Friday compared to Saturday?" }, { "from": "gpt", "value": "The answer is $6" } ], "clip_score": 31.015625, "loss": 2.254385232925415 }, { "id": "identity_295891", "image": "A-OKVQA/images/455156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is this game taking place? \nOptions:\n(A) gym\n(B) park\n(C) backyard\n(D) stadium" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.984375, "loss": 4.428652763366699 }, { "id": "identity_236462", "image": "ScienceQA/images/5810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of these objects can be folded into different shapes, and which of these objects can be used to hold or secure something?" }, { "from": "gpt", "value": "The answer is Paper crane, Handkerchief, Belt" } ], "clip_score": 30.5, "loss": 1.543355941772461 }, { "id": "identity_45894", "image": "DVQA/images/bar_train_00151139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people who prefer \"sugar\" between \"fit\" and \"shop\"?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.5625, "loss": 2.613640069961548 }, { "id": "identity_324160", "image": "TextVQA/images/0183848df6accae7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of this train?" }, { "from": "gpt", "value": "The answer is 50036" } ], "clip_score": 20.078125, "loss": 1.7824722528457642 }, { "id": "identity_265005", "image": "ChartQA/images/two_col_42769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the numerical value assigned to Jive?" }, { "from": "gpt", "value": "The answer is 1255" } ], "clip_score": 17.0625, "loss": 2.7378382682800293 }, { "id": "identity_93269", "image": "TabMWP/images/35970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of science articles read?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.0625, "loss": 2.5184390544891357 }, { "id": "identity_21656", "image": "ChartQA/images/4566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What % of the sub-Saharan African public thinks that the increasing use of the internet has had a good influence on Education?" }, { "from": "gpt", "value": "The answer is 79" } ], "clip_score": 31.90625, "loss": 2.6574971675872803 }, { "id": "identity_307481", "image": "DocVQA/images/nxvg0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When does it start?" }, { "from": "gpt", "value": "The answer is 7/1/70" } ], "clip_score": 16.59375, "loss": 1.824021577835083 }, { "id": "identity_32779", "image": "TextVQA/images/011c36b835cc0e31.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the small text say above 6?" }, { "from": "gpt", "value": "The answer is quartz" } ], "clip_score": 14.640625, "loss": 4.068807125091553 }, { "id": "identity_49100", "image": "DocVQA/images/tjxn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fax number of the supplier?" }, { "from": "gpt", "value": "The answer is 15124762832" } ], "clip_score": 24.78125, "loss": 2.2931363582611084 }, { "id": "identity_112439", "image": "A-OKVQA/images/428440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dragon heads are visible on the boat?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.078125, "loss": 4.2549543380737305 }, { "id": "identity_62068", "image": "DocVQA/images/llhm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the memorandum from?" }, { "from": "gpt", "value": "The answer is Dr. William J. Darby" } ], "clip_score": 28.390625, "loss": 3.438136339187622 }, { "id": "identity_147236", "image": "PlotQA/images/54154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph indicate the aid flow from Korea to the world in 1998?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.296875, "loss": 3.293792724609375 }, { "id": "identity_318428", "image": "ScienceQA/images/12788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the question that Mitchell's experiment can best address.\nChoices:\n(A) Do steel nails rust in fewer days when submerged in a larger amount of liquid compared to a smaller amount?\n(B) Do steel nails take fewer days to rust in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.859375, "loss": 3.817237377166748 }, { "id": "identity_177327", "image": "TabMWP/images/10493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pizzas had exactly 20 pieces of pepperoni?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 30.46875, "loss": 1.9345778226852417 }, { "id": "identity_276835", "image": "TextVQA/images/1ea75b6f56c2a71c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is considered the king of social media?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 20.390625, "loss": 4.165093421936035 }, { "id": "identity_101848", "image": "CLEVR-Math/images/CLEVR_train_012685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red objects are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.75, "loss": 2.2511379718780518 }, { "id": "identity_68743", "image": "ScienceQA/images/15443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the state capital of the highlighted state?" }, { "from": "gpt", "value": "The answer is Harrisburg" } ], "clip_score": 29.34375, "loss": 3.1588833332061768 }, { "id": "identity_110582", "image": "PMC-VQA/images/PMC8727688_f2_156113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images showing the upper or lower part of the human body?" }, { "from": "gpt", "value": "The answer is Lower part of the human body" } ], "clip_score": 25.421875, "loss": 1.9149070978164673 }, { "id": "identity_26112", "image": "GeoQA+/images/3158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C,D在⊙O上.若∠ABC=60°,则∠D的度数为()\nChoices:\n(A) 25°\n(B) 30°\n(C) 35°\n(D) 40°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.015625, "loss": 2.6405413150787354 }, { "id": "identity_175393", "image": "IconQA/images/21394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the marbles identical in size and pattern?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.234375, "loss": 3.0331013202667236 }, { "id": "identity_243937", "image": "PMC-VQA/images/PMC7417211_ece36552-fig-0001_232164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the white flowers seen in the foreground of the photograph?" }, { "from": "gpt", "value": "The answer is approximately 2 mm" } ], "clip_score": 17.890625, "loss": 2.7435250282287598 }, { "id": "identity_6397", "image": "GeoQA+/images/689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点D是△ABC的边BC上任意一点,点E、F分别是线段AD、CE的中点,若△ABC的面积为24,则△BEF的面积等于()\nChoices:\n(A) 12\n(B) 8\n(C) 6\n(D) 9.6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 2.673053503036499 }, { "id": "identity_281572", "image": "AI2D/images/580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject matter of the diagram provided?\nChoices:\n(A) The life cycle of a lizard\n(B) The life cycle of a frog\n(C) The life cycle of a newt\n(D) The life cycle of a fish" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.125, "loss": 3.8156142234802246 }, { "id": "identity_8886", "image": "UniGeo/images/1625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线l_{1}∥l_{2}∥l_{3},直线AC分别交l_{1},l_{2},l_{3}于点A,B,C,直线DF分别交l_{1},l_{2},l_{3}于点D,E,F,AC与DF相交于点H,如果AB=5,BH=1,CH=2,那么的值等于()\nChoices:\n(A) \\frac{1}{5}\n(B) \\frac{1}{3}\n(C) \\frac{2}{5}\n(D) \\frac{3}{5}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.9375, "loss": 2.4392080307006836 }, { "id": "identity_58470", "image": "FigureQA/images/38032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many segments are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 35.4375, "loss": 2.4882888793945312 }, { "id": "identity_131126", "image": "Super-CLEVR/images/superCLEVR_new_001848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located between the yellow car and the teal sedan?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 24.71875, "loss": 5.010812282562256 }, { "id": "identity_226801", "image": "IconQA/images/70113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sequence of transformations (flip, turn, slide) has been applied to the letter 'G' to obtain the letter on the right?" }, { "from": "gpt", "value": "The answer is Flip horizontally" } ], "clip_score": 30.46875, "loss": 2.468392848968506 }, { "id": "identity_140774", "image": "MapQA/images/map_9612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average burn number range for the states in the Midwest region?" }, { "from": "gpt", "value": "The answer is 51-55" } ], "clip_score": 33.5625, "loss": 1.8127620220184326 }, { "id": "identity_89620", "image": "UniGeo/images/2552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, what is the perimeter of the rhombus?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 37.84375, "loss": 2.1676089763641357 }, { "id": "identity_170848", "image": "IconQA/images/15515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the three friends went to soccer practice consecutively, what is the total time span?" }, { "from": "gpt", "value": "The answer is 2 hours" } ], "clip_score": 20.5625, "loss": 2.5890841484069824 }, { "id": "identity_134195", "image": "DocVQA/images/klvh0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pages are included in this transmission?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 22.90625, "loss": 2.551765203475952 }, { "id": "identity_289356", "image": "ScienceQA/images/12805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Fairbanks\n(C) Phoenix\n(D) Boise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.109375, "loss": 4.146311283111572 }, { "id": "identity_116040", "image": "PMC-VQA/images/PMC7319815_ytaa086-F1_200240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the frequency of the ultrasound probe used in this echocardiogram?" }, { "from": "gpt", "value": "The answer is 2.7 MHz" } ], "clip_score": 31.6875, "loss": 1.9372689723968506 }, { "id": "identity_208000", "image": "PMC-VQA/images/PMC6162434_cancers-10-00313-f001_374469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the intensity of the abnormality compared to the surrounding tissue?" }, { "from": "gpt", "value": "The answer is Lower intensity" } ], "clip_score": 26.59375, "loss": 4.431711673736572 }, { "id": "identity_300206", "image": "TextVQA/images/314359e5b2eedc3b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the whiskey on the far left?" }, { "from": "gpt", "value": "The answer is chivas" } ], "clip_score": 34.6875, "loss": 4.461698532104492 }, { "id": "identity_69800", "image": "TQA/images/question_images/seasons_2629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which season receives direct rays from the sun on December 22nd as per the diagram?" }, { "from": "gpt", "value": "The answer is Winter" } ], "clip_score": 31.171875, "loss": 2.755115509033203 }, { "id": "identity_18146", "image": "TabMWP/images/6251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Naomi just missed the 9.45 A.M. train at Richmond. What time is the next train?\nChoices:\n(A) 9:45 A.M.\n(B) 11:45 A.M.\n(C) 4:15 P.M.\n(D) 10:00 A.M." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.5625, "loss": 3.123838424682617 }, { "id": "identity_296849", "image": "VQA2.0/images/186233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pillars on the building?" }, { "from": "gpt", "value": "The answer is cannot tell" } ], "clip_score": 16.734375, "loss": 4.720118999481201 }, { "id": "identity_304065", "image": "TQA/images/abc_question_images/aquifers_16532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the diagram labeled F?\nChoices:\n(A) CONFINED AQUIFER \n(B) WATER TABLE WELL\n(C) FLOWING ARTESIAN WELL\n(D) UNCONFINED AQUIFER" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 39.03125, "loss": 4.244899272918701 }, { "id": "identity_152668", "image": "ScienceQA/images/12586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the Fluoromethane molecule, what is the hybridization state of the carbon atom?" }, { "from": "gpt", "value": "The answer is sp3 hybridization" } ], "clip_score": 29.59375, "loss": 1.4848685264587402 }, { "id": "identity_61930", "image": "DocVQA/images/ylnf0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of Section I under Section C?" }, { "from": "gpt", "value": "The answer is Group Activities to Examine Your Own Food Behavior" } ], "clip_score": 19.0625, "loss": 1.7080391645431519 }, { "id": "identity_9605", "image": "DocVQA/images/pjbv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where will the session be held?" }, { "from": "gpt", "value": "The answer is Universidad Autonoma de Guadalajara" } ], "clip_score": 16.203125, "loss": 2.5908913612365723 }, { "id": "identity_35845", "image": "PMC-VQA/images/PMC4743604_F0002_471273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the scale bar in the TEM image refer to? \nChoices:\n(A) Size of platelets \n(B) Magnification of the image \n(C) Thickness of the sample \n(D) Volume of the PRP " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.90625, "loss": 5.085293769836426 }, { "id": "identity_270459", "image": "TQA/images/question_images/human_system_digestive_3714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of salivary glands in the human body?\nChoices:\n(A) 3\n(B) 2\n(C) 1\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.78125, "loss": 3.1889548301696777 }, { "id": "identity_189862", "image": "TextVQA/images/939c816006f61921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the taxi in the foreground?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 18.234375, "loss": 2.570263385772705 }, { "id": "identity_32749", "image": "VQA-AS/images/11879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the little girl petting the top of the dog?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.46875, "loss": 3.703413486480713 }, { "id": "identity_180897", "image": "IconQA/images/73690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you disconnect the chain of paper clips, how many individual paper clips will you have?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.4375, "loss": 2.3160560131073 }, { "id": "identity_17505", "image": "Geometry3K/images/1264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find A F.\nChoices:\n(A) 9\n(B) 10.8\n(C) 18\n(D) 21.6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.296875, "loss": 3.443042039871216 }, { "id": "identity_254280", "image": "PMC-VQA/images/PMC6889661_Fig2_77609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between Sagittal T1-weighted and Axial T2-weighted MRI images, and how can you identify them in the given images?" }, { "from": "gpt", "value": "The answer is Axial T2-weighted images show horizontal slices with bright fluids" } ], "clip_score": 33.1875, "loss": 3.126465320587158 }, { "id": "identity_203042", "image": "VQA2.0/images/442774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many monitors are there in the room?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.75, "loss": 1.8844128847122192 }, { "id": "identity_248767", "image": "ChartQA/images/05411753006467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the total mismanaged plastic waste of the Dominican Republic and the combined total of Ireland and Guadeloupe?" }, { "from": "gpt", "value": "The answer is 203,124 tonnes" } ], "clip_score": 35.875, "loss": 2.2510464191436768 }, { "id": "identity_226848", "image": "PMC-VQA/images/PMC7693825_jcm-09-03516-f002_306867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures can be identified in the CT-scan, and how can you differentiate between the native mandible and the postresection area based on their appearance and location?" }, { "from": "gpt", "value": "The answer is Native mandible, postresection area, surgical plates, and screws" } ], "clip_score": 31.046875, "loss": 2.3168065547943115 }, { "id": "identity_35932", "image": "TabMWP/images/30541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An author kept a log of how many pages he wrote in the past 6 days. What is the mean of the numbers?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.625, "loss": 2.924220323562622 }, { "id": "identity_143412", "image": "DocVQA/images/xkvw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main topics are listed on the agenda?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.4375, "loss": 2.471966028213501 }, { "id": "identity_273141", "image": "UniGeo/images/2155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, D is a point inside triangle ABC, BD is perpendicular to CD, AD = 7, BD = 4, CD = 3, E, F, G, H are the midpoints of AB, BD, CD, AC respectively. What is the perimeter of quadrilateral EFGH?\nChoices:\n(A) 12\n(B) 14\n(C) 24\n(D) 21" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.875, "loss": 2.484691619873047 }, { "id": "identity_298343", "image": "VQA-AS/images/16662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fish present in the pond?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.984375, "loss": 3.5463852882385254 }, { "id": "identity_176613", "image": "TabMWP/images/28135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Maya's income that went to other taxes?" }, { "from": "gpt", "value": "The answer is 7.65%" } ], "clip_score": 29.734375, "loss": 1.53733229637146 }, { "id": "identity_139468", "image": "DVQA/images/bar_train_00167366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the bars labeled \"lip\" in the graph?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 21.828125, "loss": 2.3429064750671387 }, { "id": "identity_62347", "image": "DocVQA/images/pzlf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of the presentation for the American Heart Association's Fourth Science Writers Forum?" }, { "from": "gpt", "value": "The answer is Salt and Hypertension" } ], "clip_score": 22.828125, "loss": 1.9230577945709229 }, { "id": "identity_291668", "image": "PMC-VQA/images/PMC7278007_diagnostics-10-00299-f001_186071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the example, which three directions are the CT images presented in?\nChoices:\n(A) frontal, sagittal, and transversal\n(B) anterior, posterior, and inferior\n(C) transversal, sagittal, and coronal\n(D) lateral, superior, and ventral" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.90625, "loss": 3.660568952560425 }, { "id": "identity_330969", "image": "GeoQA+/images/4475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,矩形ABCD中,AB:AD=2,点E为AB的中点,点F为EC上一个动点,点P为DF的中点,连接PB.若PB的最小值为5√{2},则AD的值为多少?\nChoices:\n(A) 5\n(B) 6\n(C) 7\n(D) 8" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.390625, "loss": 2.4383914470672607 }, { "id": "identity_266467", "image": "VizWiz/images/VizWiz_train_00012211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you determine the quantity of eggs present?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 22.3125, "loss": 2.6369404792785645 }, { "id": "identity_237085", "image": "DocVQA/images/ffjv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the recommended control methods for all the weeds listed in the document?" }, { "from": "gpt", "value": "The answer is Pyramin, TCA, Dowpon, Betanal, Herbicide 273, Roneet" } ], "clip_score": 27.015625, "loss": 3.791785717010498 }, { "id": "identity_122421", "image": "VQA-RAD/images/synpic35914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of bowel obstruction?" }, { "from": "gpt", "value": "The answer is No, there are no signs of bowel obstruction" } ], "clip_score": 24.609375, "loss": 1.4478181600570679 }, { "id": "identity_305609", "image": "ChartQA/images/multi_col_100046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall value in 2014?" }, { "from": "gpt", "value": "The answer is 2700" } ], "clip_score": 20.328125, "loss": 2.2742607593536377 }, { "id": "identity_47180", "image": "DocVQA/images/pklm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for early hotel reservations?" }, { "from": "gpt", "value": "The answer is October 27, 1978" } ], "clip_score": 25.46875, "loss": 1.4539839029312134 }, { "id": "identity_209623", "image": "VQA-AS/images/140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the slide?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.734375, "loss": 3.492124557495117 }, { "id": "identity_309047", "image": "A-OKVQA/images/441736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is in the most danger?\nChoices:\n(A) left person\n(B) right person\n(C) farthest person\n(D) rear person" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.40625, "loss": 3.885286331176758 }, { "id": "identity_99223", "image": "IconQA/images/10814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides does each shape have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.765625, "loss": 1.787199854850769 }, { "id": "identity_174349", "image": "TabMWP/images/28720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of books read by Olivia's classmates?" }, { "from": "gpt", "value": "The answer is 34.5" } ], "clip_score": 33.59375, "loss": 1.8663278818130493 }, { "id": "identity_197423", "image": "A-OKVQA/images/206082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cabinet doors are visible in the kitchen?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.1875, "loss": 2.2304389476776123 }, { "id": "identity_254511", "image": "DocVQA/images/nycd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the institution where Dr. Robert Shank works, and what is the date of the letter?" }, { "from": "gpt", "value": "The answer is Dept. of Medicine, Washington Univ., St. Louis, Mo., May 26, 1970" } ], "clip_score": 28.140625, "loss": 1.1303131580352783 }, { "id": "identity_118376", "image": "PMC-VQA/images/PMC6199147_FIG2_385376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a medical device in the image?" }, { "from": "gpt", "value": "The answer is No evidence of a medical device" } ], "clip_score": 22.84375, "loss": 3.041560173034668 }, { "id": "identity_81993", "image": "UniGeo/images/1024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ADC = 33°, what is the measure of ∠AOB?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 31.1875, "loss": 2.1295711994171143 }, { "id": "identity_319466", "image": "TextVQA/images/4748e3595f9984b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company is the red banner displaying?" }, { "from": "gpt", "value": "The answer is santand" } ], "clip_score": 15.3828125, "loss": 4.353243827819824 }, { "id": "identity_139487", "image": "PlotQA/images/101368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what factor did the number of community health workers increase from 2004 to 2007?" }, { "from": "gpt", "value": "The answer is 3.5" } ], "clip_score": 24.515625, "loss": 2.222581624984741 }, { "id": "identity_216666", "image": "A-OKVQA/images/455384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What channel is the advertised TV show airing on?" }, { "from": "gpt", "value": "The answer is MyTV" } ], "clip_score": 17.84375, "loss": 3.9829161167144775 }, { "id": "identity_312502", "image": "FigureQA/images/53769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.057625770568848 }, { "id": "identity_231450", "image": "DVQA/images/bar_train_00161699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of the values of all bars in the groups that contain at least one bar with a value greater than 4?" }, { "from": "gpt", "value": "The answer is 69" } ], "clip_score": 24.8125, "loss": 2.238786220550537 }, { "id": "identity_117746", "image": "VizWiz/images/VizWiz_train_00018025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many numerals are displayed on the clock face?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 27.609375, "loss": 1.9033688306808472 }, { "id": "identity_111818", "image": "TextVQA/images/5540bbfdacc9e5d5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the natural setting surrounding the train?" }, { "from": "gpt", "value": "The answer is Mountains and trees" } ], "clip_score": 24.40625, "loss": 3.5105111598968506 }, { "id": "identity_336034", "image": "PMC-VQA/images/PMC6881988_Fig3_75230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the CT image show in the right lower lung area?\nChoices:\n(A) Peripheral scarring from previous infarcts\n(B) Thin wall cavity consistent with pulmonary infarction\n(C) Reduced vessels in the darker lung\n(D) None of the above options are correct." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.3125, "loss": 4.486156940460205 }, { "id": "identity_13620", "image": "Geometry3K/images/221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of \\triangle C P D if the perimeter of \\triangle B P A is 12, B M = \\sqrt { 13 }, and C N = 3 \\sqrt { 13 }.\nChoices:\n(A) 6\n(B) 12\n(C) 24\n(D) 36" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.375, "loss": 2.3893349170684814 }, { "id": "identity_220985", "image": "VQA2.0/images/251591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number visible on the hydrant?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 29.4375, "loss": 2.9526095390319824 }, { "id": "identity_11070", "image": "PMC-VQA/images/PMC4246683_f3-ol-09-01-0131_339948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the location of the cystic lesion? \nChoices:\n(A) Right maxillary sinus \n(B) Left maxillary sinus \n(C) Right frontal sinus \n(D) Left frontal sinus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 4.506664276123047 }, { "id": "identity_70188", "image": "TQA/images/abc_question_images/human_system_muscular_16157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label points to the deltoid muscle?" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 27.703125, "loss": 2.8801379203796387 }, { "id": "identity_222893", "image": "Super-CLEVR/images/superCLEVR_new_001393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle in the image that is not touching the ground?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.640625, "loss": 3.1190857887268066 }, { "id": "identity_307419", "image": "Geometry3K/images/404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of an angle. \nChoices:\n(A) 30\n(B) 50\n(C) 60\n(D) 80" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.59375, "loss": 2.997572660446167 }, { "id": "identity_25773", "image": "DVQA/images/bar_train_00090075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the most units in any shop?" }, { "from": "gpt", "value": "The answer is random" } ], "clip_score": 31.90625, "loss": 3.689997911453247 }, { "id": "identity_300475", "image": "AI2D/images/2114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many creatures depend on grass for sustenance?\nChoices:\n(A) 2\n(B) 5\n(C) 1\n(D) 3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2265625, "loss": 3.3731002807617188 }, { "id": "identity_148188", "image": "PlotQA/images/48414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in health expenditure between 1995 and 1996 for Thailand?" }, { "from": "gpt", "value": "The answer is Approximately 0.5" } ], "clip_score": 24.78125, "loss": 2.5419063568115234 }, { "id": "identity_247078", "image": "GeoQA+/images/135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,在▱ABCD中,AB=10cm,AB边上的高DH=4cm,BC=6cm,求平行四边形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 40 cm²" } ], "clip_score": 29.0625, "loss": 1.7177138328552246 }, { "id": "identity_183012", "image": "PMC-VQA/images/PMC4965257_F2_57008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lateral ventricles symmetrical in this MRI image?" }, { "from": "gpt", "value": "The answer is Yes, the lateral ventricles are symmetrical" } ], "clip_score": 35.8125, "loss": 1.453125238418579 }, { "id": "identity_20795", "image": "VQA-AS/images/8998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the man staying warm?" }, { "from": "gpt", "value": "The answer is fire" } ], "clip_score": 25.46875, "loss": 3.263043165206909 }, { "id": "identity_293284", "image": "VQA2.0/images/372121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this an all-you-can-eat dining option?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.96875, "loss": 3.003225564956665 }, { "id": "identity_76099", "image": "ScienceQA/images/11313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the Park in the grid?" }, { "from": "gpt", "value": "The answer is B3" } ], "clip_score": 30.265625, "loss": 2.4268081188201904 }, { "id": "identity_294929", "image": "PMC-VQA/images/PMC8689062_F1_143318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In photographs (C-E), which brain regions were captured? \nChoices:\n(A) frontal lobe and cerebellum\n(B) parietal cortex and hippocampus\n(C) occipital lobe and amygdala\n(D) temporal lobe and basal ganglia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.625, "loss": 3.7745578289031982 }, { "id": "identity_130497", "image": "VQA2.0/images/34469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any equipment visible in the image that is used for horse care or riding?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.484375, "loss": 2.7530441284179688 }, { "id": "identity_261255", "image": "PMC-VQA/images/PMC4592368_Fig1_429401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of T2-weighted FLAIR images compared to T1-weighted MPRAGE images, and how can these features be used to identify the type of imaging used in the middle column images?" }, { "from": "gpt", "value": "The answer is T2-weighted FLAIR images show dark CSF and bright lesions" } ], "clip_score": 27.484375, "loss": 2.2820217609405518 }, { "id": "identity_24638", "image": "DocVQA/images/pjxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the President of Pennsylvania Dietetic Association?" }, { "from": "gpt", "value": "The answer is Sister Maude Behrman" } ], "clip_score": 28.015625, "loss": 3.9440319538116455 }, { "id": "identity_116332", "image": "A-OKVQA/images/578137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many picnic tables are visible in the image?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 28.75, "loss": 2.20512056350708 }, { "id": "identity_318029", "image": "FigureQA/images/97764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid at the top value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.296875, "loss": 4.470717430114746 }, { "id": "identity_256033", "image": "A-OKVQA/images/136458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices, including the laptop, are visible in the image?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.796875, "loss": 2.2845611572265625 }, { "id": "identity_17992", "image": "TabMWP/images/11419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mabel, a huge fan, tracked the scores on her favorite radio quiz show. How many people scored more than 3?" }, { "from": "gpt", "value": "The answer is 29" } ], "clip_score": 37.34375, "loss": 2.4239022731781006 }, { "id": "identity_254768", "image": "IconQA/images/86751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows and columns of ovals are there, and what is the total number of ovals?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.875, "loss": 3.44689679145813 }, { "id": "identity_8586", "image": "Super-CLEVR/images/superCLEVR_new_000724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the large object behind the blue car the same as the matte wagon?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 23.625, "loss": 5.413651943206787 }, { "id": "identity_87859", "image": "UniGeo/images/1862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OM = 4 and ON = 5, what is the length of chord AB?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.09375, "loss": 2.350499391555786 }, { "id": "identity_253397", "image": "VQA2.0/images/474550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the establishment and what city is it located in?" }, { "from": "gpt", "value": "The answer is Doughnut Plant, New York City" } ], "clip_score": 30.640625, "loss": 1.6948388814926147 }, { "id": "identity_69351", "image": "TQA/images/question_images/hydrocarbons_8091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the molecular formula for propane as shown in the image?" }, { "from": "gpt", "value": "The answer is C3H8" } ], "clip_score": 35.0625, "loss": 1.0906093120574951 }, { "id": "identity_252881", "image": "DocVQA/images/nyvx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the Ideal Truck Lines Inc. office in Denver, Colorado, and who is the person associated with it?" }, { "from": "gpt", "value": "The answer is 3930 Blake Street, Denver, Colorado 80205, Mr. J. Dudley Lewis" } ], "clip_score": 21.625, "loss": 2.0643503665924072 }, { "id": "identity_175065", "image": "IconQA/images/104248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Connor's watch is 15 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 8:15" } ], "clip_score": 24.171875, "loss": 1.7620173692703247 }, { "id": "identity_183314", "image": "VQA2.0/images/222036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the shorts of the person on the right in the foreground?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 21.203125, "loss": 2.9596664905548096 }, { "id": "identity_258358", "image": "ChartQA/images/two_col_60940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of sentiments that are not negative?" }, { "from": "gpt", "value": "The answer is 48.37%" } ], "clip_score": 29.53125, "loss": 1.9015854597091675 }, { "id": "identity_294801", "image": "VQA-AS/images/297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the fireplace on fire?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.34375, "loss": 3.8385109901428223 }, { "id": "identity_316657", "image": "DocVQA/images/kkhf0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which department does Dr. H.S. Swingle belong?" }, { "from": "gpt", "value": "The answer is department of zoology" } ], "clip_score": 23.140625, "loss": 2.4131510257720947 }, { "id": "identity_246975", "image": "DocVQA/images/jhjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the manager of the TV station located at 1402 First Ave., Scottsbluff, Neb. 69361, and what is the name of the news director at the same address?" }, { "from": "gpt", "value": "The answer is William G. Walter and William J. Brennan" } ], "clip_score": 29.21875, "loss": 2.2352359294891357 }, { "id": "identity_2225", "image": "FigureQA/images/55414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.034512042999268 }, { "id": "identity_39254", "image": "FigureQA/images/87128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label value does the Periwinkle curve intersect the Steel Blue curve?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 27.5625, "loss": 1.9858853816986084 }, { "id": "identity_331565", "image": "PMC-VQA/images/PMC4960055_Fig3_55233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What procedure was performed on the patient?\nChoices:\n(A) Fixation of the tibia.\n(B) Open heart surgery.\n(C) Fixation of the femur.\n(D) Lumbar puncture." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.78125, "loss": 3.8356857299804688 }, { "id": "identity_229494", "image": "A-OKVQA/images/78477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the types of fruits in the image and determine the total number of each type. Then, calculate the ratio of lemons to the total number of fruits" }, { "from": "gpt", "value": "The answer is 5/14" } ], "clip_score": 28.21875, "loss": 3.9160985946655273 }, { "id": "identity_184885", "image": "PMC-VQA/images/PMC9078808_fig4_274145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to annotate the predicted area in the CEUS image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.578125, "loss": 3.612593173980713 }, { "id": "identity_10454", "image": "IconQA/images/78608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) certain\n(B) probable\n(C) impossible\n(D) unlikely" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.015625, "loss": 4.010798454284668 }, { "id": "identity_239861", "image": "FigureQA/images/36308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Aqua and Khaki?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 35.375, "loss": 1.7191636562347412 }, { "id": "identity_160586", "image": "GeoQA+/images/2373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 4 is supplementary to angle 3, what is the measure of angle 4?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 31.171875, "loss": 2.1979503631591797 }, { "id": "identity_259599", "image": "PMC-VQA/images/PMC9327211_Fig1_356939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to identify the hyper-metabolic lymph nodes in the patient, and explain why this technique is preferred over the other options for detecting hyper-metabolic activity?" }, { "from": "gpt", "value": "The answer is PET is preferred for detecting hyper-metabolic activity" } ], "clip_score": 27.78125, "loss": 2.3210697174072266 }, { "id": "identity_3983", "image": "DocVQA/images/yxbv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the Union League Club Honor Roll of Big Apple Boosters?" }, { "from": "gpt", "value": "The answer is Lincoln Hall" } ], "clip_score": 32.9375, "loss": 2.5354630947113037 }, { "id": "identity_184725", "image": "PMC-VQA/images/PMC7960597_Fig1_391687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of soft tissue damage?" }, { "from": "gpt", "value": "The answer is No visible signs of soft tissue damage" } ], "clip_score": 27.1875, "loss": 2.531102418899536 }, { "id": "identity_89076", "image": "Geometry3K/images/162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BFG in the circle?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 36.65625, "loss": 1.98659086227417 }, { "id": "identity_105524", "image": "A-OKVQA/images/132816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the photo?" }, { "from": "gpt", "value": "The answer is Likely cool or overcast weather" } ], "clip_score": 20.265625, "loss": 3.0217370986938477 }, { "id": "identity_310278", "image": "TextVQA/images/44e7fef829eb2524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the one defeating the giant?" }, { "from": "gpt", "value": "The answer is jack" } ], "clip_score": 21.703125, "loss": 4.1185712814331055 }, { "id": "identity_279602", "image": "PMC-VQA/images/PMC7503285_ijms-21-06163-f007_257397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side was the control used?\nChoices:\n(A) Right side\n(B) Left side\n(C) Both sides\n(D) Neither side" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.9140625, "loss": 4.260742664337158 }, { "id": "identity_79025", "image": "Geometry3K/images/1754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC in triangle ABC?" }, { "from": "gpt", "value": "The answer is 53.13" } ], "clip_score": 33.21875, "loss": 2.122321605682373 }, { "id": "identity_302430", "image": "VQA-AS/images/16377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this ball?" }, { "from": "gpt", "value": "The answer is black & white" } ], "clip_score": 16.78125, "loss": 3.0563979148864746 }, { "id": "identity_158467", "image": "ScienceQA/images/10515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two states border the highlighted state to the north?" }, { "from": "gpt", "value": "The answer is Virginia and Tennessee" } ], "clip_score": 31.984375, "loss": 2.7161178588867188 }, { "id": "identity_260548", "image": "FigureQA/images/40514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of Light Green to the area of Khaki?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 36.09375, "loss": 1.5933516025543213 }, { "id": "identity_261565", "image": "A-OKVQA/images/16403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals at the back waiting for?\nChoices:\n(A) cruise boat\n(B) bus\n(C) cab\n(D) van" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.71875, "loss": 4.164218425750732 }, { "id": "identity_148352", "image": "DocVQA/images/ftmf0024_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When are the 'Kick-Off Dinners / Meetings' scheduled?" }, { "from": "gpt", "value": "The answer is Dec. 16, 17" } ], "clip_score": 23.703125, "loss": 1.9900622367858887 }, { "id": "identity_311628", "image": "Geometry3K/images/1379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If K L M N is a parallelogram, find this value.\nChoices:\n(A) 4\n(B) 5\n(C) 12\n(D) 26" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.46875, "loss": 2.4940381050109863 }, { "id": "identity_5067", "image": "DVQA/images/bar_train_00073039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest accuracy reported in the whole chart?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.53125, "loss": 3.1459217071533203 }, { "id": "identity_161727", "image": "GeoQA+/images/3709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the measure of angle AOB is 70 degrees, what is the length of arc AC?" }, { "from": "gpt", "value": "The answer is (70/360) * circumference" } ], "clip_score": 30.78125, "loss": 1.8922594785690308 }, { "id": "identity_207067", "image": "PMC-VQA/images/PMC4209069_Fig4_330418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Third ventricle" } ], "clip_score": 25.9375, "loss": 2.981886386871338 }, { "id": "identity_84214", "image": "UniGeo/images/1763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠A = 120°, BD is the angle bisector of ∠ABC, and DE is perpendicular to BC, what is the ratio of the area of triangle ABD to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 32.71875, "loss": 1.4127330780029297 }, { "id": "identity_203683", "image": "PMC-VQA/images/PMC7589685_cancers-12-03055-f006_282970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the scan in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Transverse (axial) orientation" } ], "clip_score": 29.484375, "loss": 3.344209909439087 }, { "id": "identity_262649", "image": "PMC-VQA/images/PMC7373548_F2_217581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the X-ray report, where was the fracture identified?\nChoices:\n(A) Left Inferior Process C2 and C3\n(B) Superior Process of C6 and C7\n(C) Both A and B are accurate\n(D) Right Inferior Process C2 and C3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.703125, "loss": 3.902193546295166 }, { "id": "identity_13128", "image": "TQA/images/question_images/cellular_respiration_6622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is considered a by-product during photosynthesis?\nChoices:\n(A) carbon dioxide\n(B) glucose\n(C) oxygen\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 4.066286563873291 }, { "id": "identity_207355", "image": "VQA2.0/images/343322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any sports-related words on the building signage?" }, { "from": "gpt", "value": "The answer is Yes, \"SPORT" } ], "clip_score": 17.359375, "loss": 3.931103229522705 }, { "id": "identity_122286", "image": "VQA-RAD/images/synpic57813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the field of view (FOV) size of this MRI image?" }, { "from": "gpt", "value": "The answer is 120 mm" } ], "clip_score": 33.84375, "loss": 1.5735201835632324 }, { "id": "identity_153683", "image": "ScienceQA/images/10186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary composition of the planet with the largest volume?" }, { "from": "gpt", "value": "The answer is gas" } ], "clip_score": 29.5, "loss": 3.6667652130126953 }, { "id": "identity_282278", "image": "A-OKVQA/images/297419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is in close proximity to the flowers among the following options: dog, rooster, cat, or cow?\nChoices:\n(A) dog\n(B) rooster\n(C) cat\n(D) cow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.09549617767334 }, { "id": "identity_286116", "image": "Geometry3K/images/490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of y that would make the quadrilateral a parallelogram.\nOptions:\n(A) 12\n(B) 24\n(C) 48\n(D) 72" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.375, "loss": 2.931011915206909 }, { "id": "identity_281054", "image": "UniGeo/images/531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Liang Liang made a conical paper hat for the 10th Lanzhou City Art Festival for primary and secondary school students. The base radius is 9 and the height is 12. If there is no loss, what is the lateral surface area of this conical paper hat?\nChoices:\n(A) 90π\n(B) 120π\n(C) 135π\n(D) 150π" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.875, "loss": 2.32869291305542 }, { "id": "identity_307760", "image": "MapQA/images/map_11209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in this region?" }, { "from": "gpt", "value": "The answer is 67122-67710" } ], "clip_score": 24.3125, "loss": 2.594538688659668 }, { "id": "identity_246706", "image": "DocVQA/images/rmnp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the University of Minnesota's School of Public Health mentioned in the letterhead?" }, { "from": "gpt", "value": "The answer is 1325 Mayo Memorial Building, Minneapolis, Minnesota 55455" } ], "clip_score": 25.703125, "loss": 1.443070888519287 }, { "id": "identity_24587", "image": "TabMWP/images/28411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Eve need to buy a package of purple streamers and a red balloon?" }, { "from": "gpt", "value": "The answer is 1.26" } ], "clip_score": 31.5625, "loss": 2.416423797607422 }, { "id": "identity_235742", "image": "FigureQA/images/2009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the largest and smallest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 27.140625, "loss": 1.7177215814590454 }, { "id": "identity_39722", "image": "DVQA/images/bar_train_00024741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bars in the chart?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.734375, "loss": 2.457244873046875 }, { "id": "identity_96784", "image": "IconQA/images/84127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are selected without looking, what is the probability that both are purple?" }, { "from": "gpt", "value": "The answer is 1/2 or 50%" } ], "clip_score": 31.640625, "loss": 1.0568372011184692 }, { "id": "identity_36899", "image": "PMC-VQA/images/PMC4921494_F2_45002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the thickness of the cross-sections shown in the images? \nChoices:\n(A) 1 μm \n(B) 5 μm \n(C) 10 μm \n(D) 15 μm " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.015625, "loss": 4.141472339630127 }, { "id": "identity_117617", "image": "Super-CLEVR/images/superCLEVR_new_001807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that are the same color as the scooter?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.484375, "loss": 3.33992338180542 }, { "id": "identity_273337", "image": "FigureQA/images/1245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Dark Orange or Turquoise?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.921875, "loss": 3.8640220165252686 }, { "id": "identity_40953", "image": "ChartQA/images/multi_col_60080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the age group 45 to 59 years have a higher percentage of 'Yes' responses compared to 'No' responses?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.890625, "loss": 3.0396625995635986 }, { "id": "identity_258780", "image": "VQA2.0/images/372768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual indicators that suggest the pizza is fully cooked, and how do they compare to the characteristics of an undercooked pizza?" }, { "from": "gpt", "value": "The answer is Golden-brown crust, melted cheese, cooked toppings, firm bottom" } ], "clip_score": 21.75, "loss": 2.610097646713257 }, { "id": "identity_94505", "image": "TabMWP/images/23972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cheese consumption per capita for the year 2006 for the three countries listed?" }, { "from": "gpt", "value": "The answer is 10.67 kg" } ], "clip_score": 34.875, "loss": 1.6423624753952026 }, { "id": "identity_96742", "image": "IconQA/images/96119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 6 o'clock is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 28.234375, "loss": 2.115138292312622 }, { "id": "identity_68559", "image": "ScienceQA/images/17522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total volume of pink particles in Solution A?" }, { "from": "gpt", "value": "The answer is 6 mL" } ], "clip_score": 37.40625, "loss": 2.704393148422241 }, { "id": "identity_116083", "image": "A-OKVQA/images/61175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What team name is written across the chest of the players' uniforms?" }, { "from": "gpt", "value": "The answer is Mets" } ], "clip_score": 25.9375, "loss": 2.6070168018341064 }, { "id": "identity_133814", "image": "Super-CLEVR/images/superCLEVR_new_001705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are present in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.625, "loss": 2.4370338916778564 }, { "id": "identity_198675", "image": "VQA-AS/images/7696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is next to the bush on the left side of the image?" }, { "from": "gpt", "value": "The answer is a squirrel" } ], "clip_score": 21.390625, "loss": 2.099670886993408 }, { "id": "identity_3387", "image": "UniGeo/images/2136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,等腰△ABC的底边BC长为6,面积是36,腰AC的垂直平分线EF分别交AC,AB边于E,F点.若点D为BC边的中点,点M为线段EF上一动点,则△CDM周长的最小值为()\nChoices:\n(A) 6\n(B) 10\n(C) 15\n(D) 16" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.1875, "loss": 2.5250751972198486 }, { "id": "identity_3382", "image": "FigureQA/images/46613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.4375, "loss": 3.8448307514190674 }, { "id": "identity_200129", "image": "PMC-VQA/images/PMC2858659_pone-0010298-g004_62727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slices of the brain are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.5, "loss": 2.610799551010132 }, { "id": "identity_234978", "image": "VQA2.0/images/395396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are on bikes and how many people are walking on the sidewalk?" }, { "from": "gpt", "value": "The answer is 4 on bikes, 7 walking" } ], "clip_score": 27.75, "loss": 2.223385810852051 }, { "id": "identity_268949", "image": "UniGeo/images/2555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, the perimeter of rhombus ABCD is 24cm. Diagonals AC and BD intersect at point O, E is the midpoint of AD. If OE is connected, what is the length of segment OE?\nChoices:\n(A) 3cm\n(B) 4cm\n(C) 2.5cm\n(D) 2cm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.53125, "loss": 2.3784220218658447 }, { "id": "identity_51716", "image": "MapQA/images/map_1817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of technicians in Florida greater than in New York?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 30.40625, "loss": 3.1045281887054443 }, { "id": "identity_262880", "image": "GeoQA+/images/56.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, a right-angled triangle board (∠A = ∠FDE = 90°, ∠F = 45°, ∠C = 60°, point D is on side AB) is positioned as shown. If the two slanting sides EF and BC are parallel to each other, what is the measure of ∠BDE?\nChoices:\n(A) 20°\n(B) 15°\n(C) 12°\n(D) 10°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.578125, "loss": 2.556981325149536 }, { "id": "identity_240029", "image": "PlotQA/images/136579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which country was the total grants received by the government from 1970 to 1973 maximum?" }, { "from": "gpt", "value": "The answer is Congo (Democratic)" } ], "clip_score": 32.15625, "loss": 2.8632946014404297 }, { "id": "identity_331689", "image": "UniGeo/images/2373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,小颖从山脚下的某点走了100米后到达山顶的某点,已知该点到山脚的垂直距离为60米,则sin某角的值为()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{4}{3}\n(D) \\frac{3}{4}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.953125, "loss": 2.4183456897735596 }, { "id": "identity_166399", "image": "IconQA/images/35231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If this shape were to be unfolded into a 2D net, how many individual rectangles would be present?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.484375, "loss": 2.0812671184539795 }, { "id": "identity_329415", "image": "ChartQA/images/7726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of Rep/Lean Rep and Dem/Lean Dem having responsibility?" }, { "from": "gpt", "value": "The answer is 117" } ], "clip_score": 27.765625, "loss": 2.6190602779388428 }, { "id": "identity_28053", "image": "DocVQA/images/jqmm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who presents the seminar?" }, { "from": "gpt", "value": "The answer is Dr. Bernard Pimstone" } ], "clip_score": 19.765625, "loss": 2.90539813041687 }, { "id": "identity_81060", "image": "GeoQA+/images/4963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and CD is a chord perpendicular to AB at C, and the length of AB is 10 units, what is the area of triangle ACD?" }, { "from": "gpt", "value": "The answer is 12.5 square units" } ], "clip_score": 36.875, "loss": 1.3938791751861572 }, { "id": "identity_289603", "image": "A-OKVQA/images/108500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the large truck stopping at this location?\nChoices:\n(A) fixing vehicle\n(B) traffic\n(C) truck broken\n(D) to advertise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.078125, "loss": 4.515687465667725 }, { "id": "identity_84404", "image": "Geometry3K/images/1128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ZCD?" }, { "from": "gpt", "value": "The answer is 22 degrees" } ], "clip_score": 32.625, "loss": 2.852388381958008 }, { "id": "identity_133281", "image": "A-OKVQA/images/360831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual plant specimens are displayed in the tubes?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.1875, "loss": 2.2430531978607178 }, { "id": "identity_34379", "image": "PMC-VQA/images/PMC4573681_Fig4_424242.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which implant shows cement extruding along the undersurface and side of the implant? \nChoices:\n(A) CPEG implant \n(B) CBACK implant \n(C) Both implants \n(D) Neither implant " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.484375, "loss": 3.7392375469207764 }, { "id": "identity_7571", "image": "VQA-RAD/images/synpic22791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is free air present in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.40625, "loss": 3.132896900177002 }, { "id": "identity_204742", "image": "VQA2.0/images/328663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are these people likely participating in?" }, { "from": "gpt", "value": "The answer is Skiing" } ], "clip_score": 26.375, "loss": 2.628765821456909 }, { "id": "identity_180356", "image": "IconQA/images/51358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If this clock gains 10 minutes every hour, what will be the time shown after 6 hours?" }, { "from": "gpt", "value": "The answer is 1:00" } ], "clip_score": 32.625, "loss": 1.9311575889587402 }, { "id": "identity_233117", "image": "IconQA/images/25671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more dots are needed to make 20 dots in total?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.09375, "loss": 2.550382137298584 }, { "id": "identity_114754", "image": "TextVQA/images/17f2ed0ad1841126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the album from which the song is playing?" }, { "from": "gpt", "value": "The answer is Era Extraña" } ], "clip_score": 17.59375, "loss": 2.603748321533203 }, { "id": "identity_163888", "image": "UniGeo/images/1296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle E is 40 degrees, what is the length of chord AC if the radius of the circle is 5 cm?" }, { "from": "gpt", "value": "The answer is AC = 2 * 5 / sin(40)" } ], "clip_score": 33.0625, "loss": 1.8008344173431396 }, { "id": "identity_83351", "image": "Geometry3K/images/1922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the triangle?" }, { "from": "gpt", "value": "The answer is 13.2 mm" } ], "clip_score": 35.71875, "loss": 2.1302013397216797 }, { "id": "identity_95486", "image": "IconQA/images/63660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there enough bones for each dog to have two?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.078125, "loss": 2.8457183837890625 }, { "id": "identity_142099", "image": "DVQA/images/bar_train_00058837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the highest bar and the lowest bar in the \"realm\" group?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.09375, "loss": 2.702791452407837 }, { "id": "identity_212371", "image": "A-OKVQA/images/98493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees without leaves in front of the school?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.96875, "loss": 3.4629993438720703 }, { "id": "identity_179969", "image": "IconQA/images/14149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are triangles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.046875, "loss": 2.315723419189453 }, { "id": "identity_123973", "image": "VQA-AS/images/6329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of window covering is used on the window?" }, { "from": "gpt", "value": "The answer is Curtains" } ], "clip_score": 20.03125, "loss": 3.0297322273254395 }, { "id": "identity_204861", "image": "VizWiz/images/VizWiz_train_00005812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is predominantly featured on the shirt?" }, { "from": "gpt", "value": "The answer is Floral pattern" } ], "clip_score": 28.640625, "loss": 3.277790069580078 }, { "id": "identity_2928", "image": "GeoQA+/images/5651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在⊙O中,AB是⊙O的直径,AB=√{2}AC,∠ODC=12°,则∠DBC的度数为()\nChoices:\n(A) 80°\n(B) 102°\n(C) 120°\n(D) 135°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.0, "loss": 2.384586811065674 }, { "id": "identity_215223", "image": "A-OKVQA/images/147972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked under the tree to the left of the elephant?" }, { "from": "gpt", "value": "The answer is Auto rickshaw" } ], "clip_score": 29.390625, "loss": 2.90749192237854 }, { "id": "identity_161119", "image": "UniGeo/images/1306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 31.75, "loss": 2.4740779399871826 }, { "id": "identity_310218", "image": "UniGeo/images/2971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A set of triangles is arranged as shown, and the measure of angle 1 is 54 degrees greater than the measure of angle 2. What is the measure of angle 1?\n\nChoices:\n(A) 72\n(B) 70\n(C) 54\n(D) 18" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.03125, "loss": 3.0990424156188965 }, { "id": "identity_76224", "image": "AI2D/images/4198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process leads to the formation of sedimentary rock from sediments?" }, { "from": "gpt", "value": "The answer is Compaction and/or Cementation" } ], "clip_score": 22.828125, "loss": 1.9766191244125366 }, { "id": "identity_195564", "image": "TextVQA/images/8b7c688892e36f16.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of glove is the player closest to the camera wearing?" }, { "from": "gpt", "value": "The answer is Baseball glove" } ], "clip_score": 23.390625, "loss": 2.232126235961914 }, { "id": "identity_14074", "image": "GeoQA+/images/4923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在菱形ABCD中,M、N分别是BC和CD的中点,NP⊥AB于点P,连接MP.若∠DAB=40°,则∠MPB=()\nChoices:\n(A) 125°\n(B) 120°\n(C) 115°\n(D) 110°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.40625, "loss": 2.5001907348632812 }, { "id": "identity_51641", "image": "DVQA/images/bar_train_00027525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'trend' dataset more accurate in the 'lap' algorithm than in the 'uncle' algorithm?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.71875, "loss": 2.9980807304382324 }, { "id": "identity_61239", "image": "ChartQA/images/two_col_81092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second most common cause of death according to the chart?" }, { "from": "gpt", "value": "The answer is Stabbing" } ], "clip_score": 24.359375, "loss": 2.615652322769165 }, { "id": "identity_316765", "image": "VQA2.0/images/506322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on top of this?" }, { "from": "gpt", "value": "The answer is flag" } ], "clip_score": 15.5625, "loss": 3.356253147125244 }, { "id": "identity_218837", "image": "A-OKVQA/images/554875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equestrian event is depicted in this image?" }, { "from": "gpt", "value": "The answer is Cross-country or show jumping event" } ], "clip_score": 32.0625, "loss": 2.798574447631836 }, { "id": "identity_92412", "image": "IconQA/images/76847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are selected at random, what is the probability that both are black?" }, { "from": "gpt", "value": "The answer is 3/5 * 2/4 = 6/20 or 3/10" } ], "clip_score": 31.046875, "loss": 1.2087714672088623 }, { "id": "identity_210552", "image": "A-OKVQA/images/435579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the snowboard the person is using?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 22.765625, "loss": 2.9839797019958496 }, { "id": "identity_195832", "image": "VQA-AS/images/17034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tree foliage?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 14.6640625, "loss": 3.4113247394561768 }, { "id": "identity_13844", "image": "TextVQA/images/03ef38f8a118d5a2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the piece of paper tagged on the board?" }, { "from": "gpt", "value": "The answer is carola - ett begrepp bortom tid ochrum" } ], "clip_score": 31.078125, "loss": 2.869401693344116 }, { "id": "identity_195822", "image": "A-OKVQA/images/62745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the occasion mentioned on the cake?" }, { "from": "gpt", "value": "The answer is 30th Birthday and Graduation" } ], "clip_score": 25.96875, "loss": 1.7492592334747314 }, { "id": "identity_319383", "image": "Super-CLEVR/images/superCLEVR_new_000396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many metallic cars are the same size as the metal aeroplane?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.046875, "loss": 2.4949569702148438 }, { "id": "identity_148473", "image": "DocVQA/images/lgbv0228_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many articles has Jon A. Vanderhoof submitted for publication?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.078125, "loss": 2.3861467838287354 }, { "id": "identity_307900", "image": "FigureQA/images/22661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 3.9677414894104004 }, { "id": "identity_63140", "image": "TQA/images/question_images/waves_8226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label C indicate in the diagram?" }, { "from": "gpt", "value": "The answer is Equilibrium position" } ], "clip_score": 27.96875, "loss": 3.4578070640563965 }, { "id": "identity_289117", "image": "DocVQA/images/sflm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which section can John R. Whinnery be found?" }, { "from": "gpt", "value": "The answer is Engineering (31)" } ], "clip_score": 17.734375, "loss": 2.6578972339630127 }, { "id": "identity_156334", "image": "AI2D/images/4029.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the layer labeled between the Crust and the Outer Core?" }, { "from": "gpt", "value": "The answer is Mantle" } ], "clip_score": 35.6875, "loss": 2.722914695739746 }, { "id": "identity_119938", "image": "PMC-VQA/images/PMC5432899_Fig2_165183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaflets are present in one leaf of the plant?" }, { "from": "gpt", "value": "The answer is Nine" } ], "clip_score": 24.609375, "loss": 3.9863593578338623 }, { "id": "identity_79762", "image": "GeoQA+/images/3578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 1 is 65 degrees, what is the measure of angle 2?" }, { "from": "gpt", "value": "The answer is 65 degrees" } ], "clip_score": 30.921875, "loss": 1.8811593055725098 }, { "id": "identity_28141", "image": "VizWiz/images/VizWiz_train_00014374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.046875, "loss": 3.81402587890625 }, { "id": "identity_330351", "image": "PlotQA/images/49215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the 1st bar from the bottom in this year represent?" }, { "from": "gpt", "value": "The answer is Belgium" } ], "clip_score": 25.234375, "loss": 3.790443181991577 }, { "id": "identity_318092", "image": "TabMWP/images/8011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A pretzel stand owner kept track of the number of pretzels sold during the past 4 days. On which day did the stand sell the most?\nChoices:\n(A) Sunday\n(B) Monday\n(C) Tuesday\n(D) Wednesday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.28125, "loss": 3.9965431690216064 }, { "id": "identity_273702", "image": "TextVQA/images/951eca6e9b73b017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which movie is this?" }, { "from": "gpt", "value": "The answer is tombraider" } ], "clip_score": 31.265625, "loss": 2.547739267349243 }, { "id": "identity_69118", "image": "AI2D/images/4438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common name of the fish associated with the Homocercal tail?" }, { "from": "gpt", "value": "The answer is Perch" } ], "clip_score": 30.5625, "loss": 3.768963098526001 }, { "id": "identity_685", "image": "DocVQA/images/hjcw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Page Number?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 14.859375, "loss": 2.9633967876434326 }, { "id": "identity_190531", "image": "A-OKVQA/images/3366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the horse ridden by the individual?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.171875, "loss": 2.9674911499023438 }, { "id": "identity_293352", "image": "FigureQA/images/30592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Blue or Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.046875, "loss": 4.642536163330078 }, { "id": "identity_193372", "image": "VQA-AS/images/5763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rug in front of the fireplace?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 29.921875, "loss": 3.006200075149536 }, { "id": "identity_129080", "image": "A-OKVQA/images/296544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of dessert is presented as the centerpiece?" }, { "from": "gpt", "value": "The answer is A tiered cake" } ], "clip_score": 30.734375, "loss": 2.9871160984039307 }, { "id": "identity_312570", "image": "ScienceQA/images/476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Newport\n(B) Augusta\n(C) Providence\n(D) Carson City" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.671875, "loss": 4.2190327644348145 }, { "id": "identity_211877", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the park mentioned on the street sign?" }, { "from": "gpt", "value": "The answer is Central Park" } ], "clip_score": 25.546875, "loss": 4.278860092163086 }, { "id": "identity_327384", "image": "TabMWP/images/27818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Observe the table. Then respond to the question. At a price of $165, is there an excess or a deficit?\nChoices:\n(A) deficit\n(B) excess" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.828125, "loss": 3.72827410697937 }, { "id": "identity_319815", "image": "GeoQA+/images/1665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图, a∥b, ∠1=120°, 则∠2等于多少?\n选项:\n(A) 30°\n(B) 90°\n(C) 60°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.890625, "loss": 2.6883296966552734 }, { "id": "identity_207848", "image": "A-OKVQA/images/498137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of stand is the motorcycle in the foreground mounted on?" }, { "from": "gpt", "value": "The answer is A center stand" } ], "clip_score": 31.53125, "loss": 3.4078986644744873 }, { "id": "identity_148201", "image": "DVQA/images/bar_train_00089131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the store represented by the color green?" }, { "from": "gpt", "value": "The answer is 10 units" } ], "clip_score": 27.421875, "loss": 2.1930952072143555 }, { "id": "identity_283407", "image": "VQA2.0/images/208731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any umbrellas currently open?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.375, "loss": 3.209326982498169 }, { "id": "identity_254276", "image": "VQA2.0/images/33943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are on the counter near the sink, and what types of items are they?" }, { "from": "gpt", "value": "The answer is Six items: cup, bottle, container, sponge, paper towel, blue item" } ], "clip_score": 14.765625, "loss": 2.910053253173828 }, { "id": "identity_157977", "image": "ScienceQA/images/5596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the electron configuration of each fluorine atom before bonding as shown in the image?" }, { "from": "gpt", "value": "The answer is 1s2 2s2 2p5" } ], "clip_score": 17.875, "loss": 0.967025876045227 }, { "id": "identity_75070", "image": "TQA/images/abc_question_images/solar_system_11433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What celestial body is represented by the label 'Y'?" }, { "from": "gpt", "value": "The answer is Pluto" } ], "clip_score": 25.71875, "loss": 2.515303373336792 }, { "id": "identity_179444", "image": "TabMWP/images/30456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Adriana buys two hair ties, how much change will she receive from $2.00?" }, { "from": "gpt", "value": "The answer is $0.56" } ], "clip_score": 28.734375, "loss": 1.9028382301330566 }, { "id": "identity_202737", "image": "PMC-VQA/images/PMC6294250_Fig1_411683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the structures indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Circular or round" } ], "clip_score": 17.046875, "loss": 3.6058526039123535 }, { "id": "identity_264184", "image": "TextVQA/images/c24753768bdd7f33.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which number is the hour hand closest?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 20.296875, "loss": 2.224134922027588 }, { "id": "identity_328958", "image": "GeoQA+/images/4945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在ABCD中,AD>AB,以点A为圆心,AB为半径画弧与AD交于点F,然后分别以B,F为圆心,大于0.5×BF为半径画弧交于点G,连接AG交BC于点E,若BF=6,AB=4,则AE的长度是多少?\nChoices:\n(A) √{7}\n(B) 2√{7}\n(C) 5\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.140625, "loss": 2.196943759918213 }, { "id": "identity_279650", "image": "TextVQA/images/6ae2ea57c4e3c335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the current time?" }, { "from": "gpt", "value": "The answer is 10:02" } ], "clip_score": 26.09375, "loss": 1.4631929397583008 }, { "id": "identity_118692", "image": "VQA2.0/images/319201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the cake's frosting?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.359375, "loss": 3.7882168292999268 }, { "id": "identity_235689", "image": "TQA/images/question_images/muscle_fiber_7086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mitochondria are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.34375, "loss": 2.4175302982330322 }, { "id": "identity_268845", "image": "ChartQA/images/two_col_3207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the decrease in the hotel attendance rate from FY 2016 to FY 2021?" }, { "from": "gpt", "value": "The answer is 1514" } ], "clip_score": 25.421875, "loss": 2.7538578510284424 }, { "id": "identity_162897", "image": "Geometry3K/images/1699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle formed by the tangent and the chord at point 7?" }, { "from": "gpt", "value": "The answer is 98 degrees" } ], "clip_score": 34.03125, "loss": 2.4104814529418945 }, { "id": "identity_201917", "image": "PMC-VQA/images/PMC7482117_f2_251602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the arrow pointing to in image A?" }, { "from": "gpt", "value": "The answer is A brain lesion or tumor" } ], "clip_score": 34.1875, "loss": 2.7008519172668457 }, { "id": "identity_16703", "image": "DocVQA/images/rgfl0225_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which is the Study Site?" }, { "from": "gpt", "value": "The answer is Covance - Dallas" } ], "clip_score": 17.390625, "loss": 4.6140546798706055 }, { "id": "identity_49257", "image": "PlotQA/images/76429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of unemployed female labor force of Yemen, Rep. over the six years shown?" }, { "from": "gpt", "value": "The answer is Approximately 35.83%" } ], "clip_score": 38.21875, "loss": 2.0065290927886963 }, { "id": "identity_303816", "image": "GeoQA+/images/1874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A right-angled triangle is placed as shown in the image, with the hypotenuses of the two triangles parallel to each other, and each right-angle vertex of one triangle lying on the hypotenuse of the other triangle. What is the measure of angle 1?\n\nChoices:\n(A) 90°\n(B) 75°\n(C) 65°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.25, "loss": 3.454603910446167 }, { "id": "identity_27621", "image": "IconQA/images/4637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Robert's mom is making eggs in the morning. The clock on the wall shows the time. What time is it?\nChoices:\n(A) 8:00 P.M.\n(B) 8:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.578125, "loss": 3.6630196571350098 }, { "id": "identity_238602", "image": "ChartQA/images/two_col_20554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weighted average of all the responses, assigning weights of 5 to \"Totally agree\", 4 to \"Somewhat agree\", 3 to \"Neither agree nor disagree\", 2 to \"Somewhat disagree\", and 1 to \"Totally disagree\"?" }, { "from": "gpt", "value": "The answer is 3.58" } ], "clip_score": 24.265625, "loss": 2.0072391033172607 }, { "id": "identity_249780", "image": "CLEVR-Math/images/CLEVR_train_054470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you start with 1 small green metal sphere, add 3 more, and then remove 2 small green metal spheres, how many small green metal spheres are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.359375, "loss": 2.225785255432129 }, { "id": "identity_210038", "image": "PMC-VQA/images/PMC8399464_pharmaceutics-13-01210-f001_46358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of microscopy technique was used to capture this image?" }, { "from": "gpt", "value": "The answer is Scanning Electron Microscopy" } ], "clip_score": 31.8125, "loss": 1.731701135635376 }, { "id": "identity_167534", "image": "IconQA/images/66217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one triangle is removed, how many triangles will remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.796875, "loss": 2.392071485519409 }, { "id": "identity_152331", "image": "ScienceQA/images/16201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the poles in Pair 1?" }, { "from": "gpt", "value": "The answer is South to North" } ], "clip_score": 22.9375, "loss": 3.3944108486175537 }, { "id": "identity_262620", "image": "ScienceQA/images/14763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism among the following contains matter that was previously part of the phytoplankton?\nChoices:\n(A) orca\n(B) zooplankton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 3.6771063804626465 }, { "id": "identity_180194", "image": "TabMWP/images/23400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a family of two seniors and two children visit the Southern Nevada Zoo, how much will they spend on tickets?" }, { "from": "gpt", "value": "The answer is $24" } ], "clip_score": 36.96875, "loss": 2.075688123703003 }, { "id": "identity_200740", "image": "PMC-VQA/images/PMC4821972_fig4_18602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the lesion appear to be elevated or flat?" }, { "from": "gpt", "value": "The answer is The lesion appears flat" } ], "clip_score": 24.046875, "loss": 3.5001320838928223 }, { "id": "identity_207923", "image": "PMC-VQA/images/PMC7565898_fig4_276346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a green color present in them?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.421875, "loss": 2.1855008602142334 }, { "id": "identity_90723", "image": "GeoQA+/images/2041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if OC is a tangent to the circle at point A, what is the length of OC given that the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is √50 or approximately 7.07 units" } ], "clip_score": 29.296875, "loss": 1.7399119138717651 }, { "id": "identity_79911", "image": "GeoQA+/images/5329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 5 and chord CD is 6, what is the length of the radius OC?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 35.125, "loss": 2.466820478439331 }, { "id": "identity_176332", "image": "IconQA/images/38349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many units has the letter 'Q' been shifted vertically?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 27.859375, "loss": 2.497400999069214 }, { "id": "identity_34634", "image": "CLEVR-Math/images/CLEVR_train_014033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all yellow blocks. Subtract all cyan balls. How many blocks are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.4375, "loss": 2.310112714767456 }, { "id": "identity_276630", "image": "ScienceQA/images/10768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area on the map?\nChoices:\n(A) Solomon Islands\n(B) Tonga\n(C) the Marshall Islands\n(D) Samoa" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.34375, "loss": 3.7953784465789795 }, { "id": "identity_181420", "image": "CLEVR-Math/images/CLEVR_train_062335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither spheres nor cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.609375, "loss": 2.478726625442505 }, { "id": "identity_208205", "image": "VQA2.0/images/351262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the lights on top of the police motorbike?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 30.953125, "loss": 3.4934749603271484 }, { "id": "identity_110651", "image": "PMC-VQA/images/PMC7925774_Fig1_382478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure indicated by the white arrow?" }, { "from": "gpt", "value": "The answer is Aorta" } ], "clip_score": 25.78125, "loss": 3.068654775619507 }, { "id": "identity_148665", "image": "DocVQA/images/kjgf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the article on \"Clinical Significance of the Various Carbohydrates\"?" }, { "from": "gpt", "value": "The answer is Colonel Plough" } ], "clip_score": 21.78125, "loss": 5.1822991371154785 }, { "id": "identity_226725", "image": "PMC-VQA/images/PMC3744869_F5_225086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of dementia shows light blue atrophy in the image, and what are the specific brain regions affected by this atrophy? Additionally, explain how these regions are involved in Theory of Mind (ToM) and language processing" }, { "from": "gpt", "value": "The answer is PNFA, bilateral insular gyrus, involved in ToM and language processing" } ], "clip_score": 32.71875, "loss": 3.192939519882202 }, { "id": "identity_72785", "image": "TQA/images/question_images/parts_neuron_7227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled as \"Node of Ranvier\"?" }, { "from": "gpt", "value": "The answer is Node of Ranvier" } ], "clip_score": 20.359375, "loss": 1.691834568977356 }, { "id": "identity_2714", "image": "GeoQA+/images/338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,▱ABCD中,∠B比∠A大40°,则∠D的度数为()\nChoices:\n(A) 60°\n(B) 70°\n(C) 100°\n(D) 110°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.234375, "loss": 2.789290428161621 }, { "id": "identity_285632", "image": "VQA-AS/images/6215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is located on the stool?" }, { "from": "gpt", "value": "The answer is plant" } ], "clip_score": 18.984375, "loss": 3.234632968902588 }, { "id": "identity_71533", "image": "ScienceQA/images/4853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Would the rock shown in the image be suitable for outdoor construction?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.96875, "loss": 3.0836868286132812 }, { "id": "identity_268009", "image": "TabMWP/images/8112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each table contains 2 plates, how many plates are there in total on 3 tables?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 36.1875, "loss": 2.081913948059082 }, { "id": "identity_150390", "image": "TQA/images/question_images/human_system_nervous_2854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure located between the frontal lobe and the parietal lobe?" }, { "from": "gpt", "value": "The answer is central sulcus" } ], "clip_score": 34.9375, "loss": 2.529693365097046 }, { "id": "identity_169143", "image": "TabMWP/images/1889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest score obtained by any student?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 29.34375, "loss": 2.5109164714813232 }, { "id": "identity_324507", "image": "IconQA/images/21292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a turtle. What is the second image?\nChoices:\n(A) sub\n(B) fish\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.75, "loss": 3.9215214252471924 }, { "id": "identity_185631", "image": "PMC-VQA/images/PMC3844447_F11_246859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the abnormality indicated by the arrowheads?" }, { "from": "gpt", "value": "The answer is Provide an estimation based on the image scale" } ], "clip_score": 21.65625, "loss": 3.4598042964935303 }, { "id": "identity_288290", "image": "DocVQA/images/hnfb0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the President & Scientific Director of the National Vitamin Foundation?" }, { "from": "gpt", "value": "The answer is robert s. goodhart, m.d." } ], "clip_score": 39.03125, "loss": 1.4584733247756958 }, { "id": "identity_13322", "image": "Super-CLEVR/images/superCLEVR_new_001933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The red thing that is the same material as the airliner is what shape?" }, { "from": "gpt", "value": "The answer is tandem bike" } ], "clip_score": 25.25, "loss": 3.9940073490142822 }, { "id": "identity_188704", "image": "PMC-VQA/images/PMC6952038_FIG1_97204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the numerical value for the window width (WW) in the image?" }, { "from": "gpt", "value": "The answer is 112" } ], "clip_score": 16.375, "loss": 2.44938325881958 }, { "id": "identity_197982", "image": "VQA-AS/images/15124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the sofa?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.78125, "loss": 3.446634292602539 }, { "id": "identity_305044", "image": "Geometry3K/images/1166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If P R \\parallel K L, K N = 9, L N = 16, P M = 2 times K P, find K P.\nChoices:\n(A) 5\n(B) 7\n(C) 8\n(D) 14" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.125, "loss": 2.5951786041259766 }, { "id": "identity_253409", "image": "PMC-VQA/images/PMC8102829_F4_435054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structures shown in contact in the image and describe their roles in the central nervous system" }, { "from": "gpt", "value": "The answer is Blood vessels supply nutrients; synapses transmit signals between neurons" } ], "clip_score": 28.1875, "loss": 3.066011667251587 }, { "id": "identity_34926", "image": "A-OKVQA/images/125275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The fruit shown contains a high level of what?\nChoices:\n(A) magnesium\n(B) potassium\n(C) vitamin\n(D) vitamin b" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.890625, "loss": 4.1022772789001465 }, { "id": "identity_154235", "image": "ScienceQA/images/3759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the strait that separates the highlighted continent from Africa?" }, { "from": "gpt", "value": "The answer is Strait of Gibraltar" } ], "clip_score": 27.59375, "loss": 1.428354024887085 }, { "id": "identity_301989", "image": "ScienceQA/images/16271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this experiment, which were included in an experimental group?\nChoices:\n(A) the plots with fires\n(B) the plots with no fires" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.203125, "loss": 4.206883430480957 }, { "id": "identity_229536", "image": "TabMWP/images/32224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in waiting time from July to November?" }, { "from": "gpt", "value": "The answer is 5 minutes per month" } ], "clip_score": 29.640625, "loss": 1.916555643081665 }, { "id": "identity_111042", "image": "A-OKVQA/images/512838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the flag at the top right corner of the image?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 15.234375, "loss": 3.3140957355499268 }, { "id": "identity_53223", "image": "FigureQA/images/96786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Cornflower surpass Dark Salmon at any point on the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.75, "loss": 3.2111282348632812 }, { "id": "identity_261265", "image": "PlotQA/images/9167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of people living with HIV who are receiving antiretroviral therapy in Togo from 2007 to 2014?" }, { "from": "gpt", "value": "The answer is 19%" } ], "clip_score": 39.8125, "loss": 2.0090789794921875 }, { "id": "identity_57744", "image": "MapQA/images/map_9117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Texas?" }, { "from": "gpt", "value": "The answer is 6680445-6686251" } ], "clip_score": 27.03125, "loss": 2.5228445529937744 }, { "id": "identity_69818", "image": "ScienceQA/images/13448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state flower of the highlighted state?" }, { "from": "gpt", "value": "The answer is Sagebrush" } ], "clip_score": 30.421875, "loss": 1.6397504806518555 }, { "id": "identity_105804", "image": "VQA2.0/images/417204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Dusk or dawn" } ], "clip_score": 13.125, "loss": 3.1953177452087402 }, { "id": "identity_260456", "image": "Super-CLEVR/images/superCLEVR_new_001685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles (including aeroplanes) that are to the right of the tiny metal car that is right of the tiny gray rubber aeroplane?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.0, "loss": 2.421867609024048 }, { "id": "identity_280890", "image": "A-OKVQA/images/268306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the large glass object on the bike that serves as a light source?\nChoices:\n(A) headlight\n(B) turn light\n(C) break light\n(D) night light" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.21875, "loss": 4.331297397613525 }, { "id": "identity_132929", "image": "VQA2.0/images/95409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any children present in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is at least one child" } ], "clip_score": 13.25, "loss": 2.7739431858062744 }, { "id": "identity_280825", "image": "UniGeo/images/3098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a right-angled triangle and a ruler are placed as shown in the diagram, and ∠α is 43°, what is the degree measure of ∠β?\nChoices:\n(A) 47°\n(B) 43°\n(C) 53°\n(D) 137°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.34375, "loss": 2.4151010513305664 }, { "id": "identity_215109", "image": "PMC-VQA/images/PMC3542371_pone-0053580-g004_178529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the green lines on the left or right side of the image?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 21.5625, "loss": 4.444559097290039 }, { "id": "identity_83791", "image": "UniGeo/images/989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that OA is perpendicular to BC and ∠AOB = 50°, find the measure of ∠ACB" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 31.03125, "loss": 1.6008551120758057 }, { "id": "identity_262804", "image": "AI2D/images/14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If sea weeds are washed out due to a Tsunami, what would be the consequence based on the given food chain?\nChoices:\n(A) Limpets would be affected\n(B) Prawns would face extinction\n(C) Crabs population would increase\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.140625, "loss": 3.7496867179870605 }, { "id": "identity_257693", "image": "CLEVR-Math/images/CLEVR_train_008853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After removing all brown cylinders, how many blocks are left in total, and what is the ratio of purple blocks to the total number of blocks?" }, { "from": "gpt", "value": "The answer is 4 blocks, ratio 1:2" } ], "clip_score": 26.828125, "loss": 2.88802170753479 }, { "id": "identity_278634", "image": "AI2D/images/3374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the structure shown is located at the bottom? \nChoices:\n(A) none of the above\n(B) sepals\n(C) pedicel\n(D) petal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.046875, "loss": 3.972400665283203 }, { "id": "identity_333019", "image": "A-OKVQA/images/329623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How will this be cooked?\nChoices:\n(A) barbecue\n(B) fire\n(C) oven\n(D) microwave" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.765625, "loss": 3.8478565216064453 }, { "id": "identity_131780", "image": "TextVQA/images/7d15b68bbae4c880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the stop sign?" }, { "from": "gpt", "value": "The answer is Faded and damaged" } ], "clip_score": 25.53125, "loss": 3.034198522567749 }, { "id": "identity_61893", "image": "DocVQA/images/mtkc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street address of the association?" }, { "from": "gpt", "value": "The answer is 1740 BROADWAY" } ], "clip_score": 30.90625, "loss": 1.8697596788406372 }, { "id": "identity_216064", "image": "A-OKVQA/images/365456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of court surface are the players on?" }, { "from": "gpt", "value": "The answer is Hard court" } ], "clip_score": 23.84375, "loss": 2.8182222843170166 }, { "id": "identity_25673", "image": "Geometry3K/images/1179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of the parallelogram.\nChoices:\n(A) 72\n(B) 74\n(C) 76\n(D) 78" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.03125, "loss": 3.0019335746765137 }, { "id": "identity_334876", "image": "IconQA/images/40930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 59" } ], "clip_score": 24.921875, "loss": 2.3276381492614746 }, { "id": "identity_131519", "image": "VQA2.0/images/293589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the horse the man is riding?" }, { "from": "gpt", "value": "The answer is Black with white markings" } ], "clip_score": 25.28125, "loss": 3.109316349029541 }, { "id": "identity_319768", "image": "VQA-AS/images/10957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are in the sky?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.5, "loss": 2.588977575302124 }, { "id": "identity_32610", "image": "PMC-VQA/images/PMC6322781_pone.0209674.g008_419943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which sections of the abdomen are the images taken? \nChoices:\n(A) Coronal, sagittal \n(B) Sagittal, transaxial \n(C) Transaxial, sagittal \n(D) Transaxial, coronal. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 4.223076820373535 }, { "id": "identity_224266", "image": "ChartQA/images/two_col_83167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much percentage does the combined revenue share of EMEIA** and Americas exceed the revenue share of the Asia Pacific region?" }, { "from": "gpt", "value": "The answer is 19.48%" } ], "clip_score": 29.9375, "loss": 2.0164034366607666 }, { "id": "identity_257843", "image": "AI2D/images/374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the process of how magma from deep below the surface of the earth reaches the surface and becomes lava, including the role of the earth's crust and inner layers" }, { "from": "gpt", "value": "The answer is Magma rises, erupts through a volcano, becomes lava" } ], "clip_score": 34.0625, "loss": 2.313189744949341 }, { "id": "identity_220438", "image": "PMC-VQA/images/PMC4731441_Fig1_467175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the artery labeled with the arrowhead in the image?" }, { "from": "gpt", "value": "The answer is Left Circumflex artery (LCx)" } ], "clip_score": 29.796875, "loss": 2.161248207092285 }, { "id": "identity_111290", "image": "TextVQA/images/75de0ddb3b8e6e8f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of microphones visible on the desks?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 16.390625, "loss": 2.0512335300445557 }, { "id": "identity_277393", "image": "DVQA/images/bar_train_00014492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who preferred the object loss in all categories combined?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.171875, "loss": 2.431976318359375 }, { "id": "identity_14454", "image": "IconQA/images/68215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Eric is shopping this morning. The clock shows the time. What time is it?\nChoices:\n(A) 11:00 A.M.\n(B) 11:00 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.15625, "loss": 3.686304807662964 }, { "id": "identity_17695", "image": "FigureQA/images/70080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Web Purple have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.796875, "loss": 3.7583706378936768 }, { "id": "identity_81751", "image": "UniGeo/images/1225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If PA and PB are tangents to circle O at points A and B, and angle ACB is 65 degrees, what is the measure of angle AOB?" }, { "from": "gpt", "value": "The answer is 130 degrees" } ], "clip_score": 32.46875, "loss": 1.8658782243728638 }, { "id": "identity_220858", "image": "VQA2.0/images/99952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are occupied by people in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 22.40625, "loss": 2.535703182220459 }, { "id": "identity_252444", "image": "TQA/images/question_images/solar_system_6300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the planets in order from closest to furthest from the Sun, then identify the third planet in this sequence" }, { "from": "gpt", "value": "The answer is Earth" } ], "clip_score": 27.96875, "loss": 3.2671217918395996 }, { "id": "identity_223612", "image": "PMC-VQA/images/PMC2410174_fig1_23923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the abnormality observed in the brain in this image?" }, { "from": "gpt", "value": "The answer is Irregular and angular" } ], "clip_score": 29.296875, "loss": 4.608003616333008 }, { "id": "identity_208089", "image": "A-OKVQA/images/463665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the water body in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 18.65625, "loss": 2.9660844802856445 }, { "id": "identity_155059", "image": "AI2D/images/1495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the red arrows coming from the tree?" }, { "from": "gpt", "value": "The answer is Transpiration" } ], "clip_score": 26.09375, "loss": 2.704930543899536 }, { "id": "identity_70779", "image": "TQA/images/question_images/human_system_eye_6129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the number 3 indicate in this diagram of the eye?" }, { "from": "gpt", "value": "The answer is The ciliary body" } ], "clip_score": 34.78125, "loss": 2.335833787918091 }, { "id": "identity_335218", "image": "VQA-AS/images/8988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the animal standing on top of something near a window?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.125, "loss": 3.4522132873535156 }, { "id": "identity_330488", "image": "DocVQA/images/sykk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is mentioned related to Fall 72 and Fall 73?" }, { "from": "gpt", "value": "The answer is CULTIVATOR" } ], "clip_score": 19.3125, "loss": 3.6967806816101074 }, { "id": "identity_232590", "image": "TabMWP/images/22366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of masks made by artists who made more than 3 masks?" }, { "from": "gpt", "value": "The answer is 219 masks" } ], "clip_score": 35.5, "loss": 2.267078399658203 }, { "id": "identity_273464", "image": "FigureQA/images/79578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the area under the curve with the minimum value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.703125, "loss": 3.8778491020202637 }, { "id": "identity_332864", "image": "VQA2.0/images/365631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are in the image?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 11.234375, "loss": 1.9948883056640625 }, { "id": "identity_167544", "image": "TabMWP/images/8647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of actors per movie?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 27.5625, "loss": 4.671874046325684 }, { "id": "identity_96040", "image": "TabMWP/images/37498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phone calls did the companies make in total?" }, { "from": "gpt", "value": "The answer is 3054" } ], "clip_score": 30.875, "loss": 2.5381877422332764 }, { "id": "identity_252134", "image": "ScienceQA/images/8757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the ratio of the average kinetic energy of the particles in sample A to the average kinetic energy of the particles in sample B" }, { "from": "gpt", "value": "The answer is 1.467" } ], "clip_score": 27.578125, "loss": 1.9314022064208984 }, { "id": "identity_80999", "image": "GeoQA+/images/3176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle formed by the 45-degree triangle and the straight edge?" }, { "from": "gpt", "value": "The answer is 135 degrees" } ], "clip_score": 30.578125, "loss": 1.9121657609939575 }, { "id": "identity_209440", "image": "PMC-VQA/images/PMC4156216_f4-ol-08-04-1713_318275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the brain shows a dark area suggesting a possible issue?" }, { "from": "gpt", "value": "The answer is Right side of the brain" } ], "clip_score": 27.90625, "loss": 2.469043254852295 }, { "id": "identity_267774", "image": "Super-CLEVR/images/superCLEVR_new_001151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the red shiny motorbikes in front of the large green minivan equal the number of tiny red metal dirtbikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.828125, "loss": 4.705906867980957 }, { "id": "identity_151834", "image": "ScienceQA/images/20128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is highlighted in dark green on the map?" }, { "from": "gpt", "value": "The answer is Belize" } ], "clip_score": 30.859375, "loss": 3.9679744243621826 }, { "id": "identity_88171", "image": "UniGeo/images/324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle with center O, if chord BC is parallel to radius OA and angle C is 20 degrees, what is the measure of angle OMB?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.09375, "loss": 1.970444917678833 }, { "id": "identity_278688", "image": "VizWiz/images/VizWiz_train_00019434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are you able to read and input the code here? Thank you." }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.515625, "loss": 2.992581367492676 }, { "id": "identity_168056", "image": "IconQA/images/90695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 4 o'clock is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 27.484375, "loss": 2.0277106761932373 }, { "id": "identity_64552", "image": "AI2D/images/619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the bean plant life cycle according to the diagram?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 40.6875, "loss": 2.376115560531616 }, { "id": "identity_251878", "image": "MapQA/images/map_5147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined range of the number of inquiries for the states with the highest values in the Northeast and the Midwest?" }, { "from": "gpt", "value": "The answer is 370-377" } ], "clip_score": 34.1875, "loss": 2.012437343597412 }, { "id": "identity_40168", "image": "FigureQA/images/13186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal segments for each color?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.109375, "loss": 3.3381035327911377 }, { "id": "identity_113314", "image": "PMC-VQA/images/PMC6226003_F2_394343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different proteins or structures are labeled in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.65625, "loss": 4.283274173736572 }, { "id": "identity_5645", "image": "Geometry3K/images/478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find A X if E X = 24 and D E = 7.\nChoices:\n(A) 1\n(B) 7\n(C) 24\n(D) 25" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 2.493525266647339 }, { "id": "identity_66962", "image": "ScienceQA/images/10862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted states?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 28.578125, "loss": 2.9513485431671143 }, { "id": "identity_210181", "image": "PMC-VQA/images/PMC5608410_pone.0185239.g001_218317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is marked in the top right corner of the image?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.828125, "loss": 2.4874227046966553 }, { "id": "identity_169787", "image": "IconQA/images/67003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this shape have any parallel edges?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.0, "loss": 3.32586407661438 }, { "id": "identity_70448", "image": "AI2D/images/3155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the outer edge of the leaf, as labeled \"Margin\"?" }, { "from": "gpt", "value": "The answer is Margin" } ], "clip_score": 28.234375, "loss": 2.592184066772461 }, { "id": "identity_40106", "image": "DocVQA/images/jhyc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What department is mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Department of Preventive Medicine" } ], "clip_score": 18.296875, "loss": 2.902810573577881 }, { "id": "identity_115799", "image": "A-OKVQA/images/531146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of road is the bus driving on?" }, { "from": "gpt", "value": "The answer is A two-lane road" } ], "clip_score": 28.625, "loss": 2.465599298477173 }, { "id": "identity_79678", "image": "GeoQA+/images/789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB=AC=4 and ∠BAC=120°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 240°" } ], "clip_score": 29.9375, "loss": 1.5688261985778809 }, { "id": "identity_61508", "image": "DocVQA/images/nznm0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the topic of discussion in the Shoreham Blue Room on Wednesday evening?" }, { "from": "gpt", "value": "The answer is APS-disc Phytoal. pl.&Turf" } ], "clip_score": 16.1875, "loss": 5.620657920837402 }, { "id": "identity_27778", "image": "A-OKVQA/images/412908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of shop is this?\nChoices:\n(A) body\n(B) gift\n(C) hair\n(D) auto" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.578125, "loss": 4.108973026275635 }, { "id": "identity_229738", "image": "DocVQA/images/trkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full title and position of the person who wrote this letter?" }, { "from": "gpt", "value": "The answer is Associate Dean for Research and Graduate Study" } ], "clip_score": 22.171875, "loss": 2.5900228023529053 }, { "id": "identity_85268", "image": "GeoQA+/images/1654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point M is the midpoint of chord AB, what is the length of OM?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.8125, "loss": 2.9315106868743896 }, { "id": "identity_168828", "image": "TabMWP/images/2905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of votes for cookie dough ice cream?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.359375, "loss": 2.5577080249786377 }, { "id": "identity_239856", "image": "TabMWP/images/13635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median age of the German club members?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.8125, "loss": 2.2197437286376953 }, { "id": "identity_336919", "image": "PMC-VQA/images/PMC8099429_fig5s3_434013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition in which these images were taken?\nChoices:\n(A) Fixed conditions\n(B) Unfixed conditions\n(C) GTA conditions\n(D) Micron conditions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.0, "loss": 4.426608085632324 }, { "id": "identity_41421", "image": "FigureQA/images/62166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the y-axis label have a consistent increment?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.96875, "loss": 3.2027969360351562 }, { "id": "identity_103910", "image": "VizWiz/images/VizWiz_train_00009179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the coffee pod shown in the image indicate any specific certifications?" }, { "from": "gpt", "value": "The answer is Yes, Rainforest Alliance Certified" } ], "clip_score": 28.25, "loss": 2.282270669937134 }, { "id": "identity_325316", "image": "PlotQA/images/15936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average mineral rent in this country per year?" }, { "from": "gpt", "value": "The answer is 0.09321088829785676" } ], "clip_score": 25.8125, "loss": 2.4679148197174072 }, { "id": "identity_246423", "image": "TabMWP/images/3318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which game has the longest duration, and what is its duration in minutes?" }, { "from": "gpt", "value": "The answer is All games have the same duration of 90 minutes" } ], "clip_score": 22.703125, "loss": 2.7159221172332764 }, { "id": "identity_219804", "image": "TextVQA/images/f8e969264fc43aa4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umpire's attire?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.140625, "loss": 3.5989861488342285 }, { "id": "identity_116508", "image": "VQA-AS/images/5907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are visible in the sky?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 15.484375, "loss": 2.2408816814422607 }, { "id": "identity_5692", "image": "GeoQA+/images/562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O中,弦BC与半径OA相交于点D,连接AB,OC.若∠A=60°,∠ADC=85°,则∠C的度数是()\nChoices:\n(A) 35°\n(B) 27.5°\n(C) 30°\n(D) 25°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.625, "loss": 2.7055935859680176 }, { "id": "identity_150936", "image": "ScienceQA/images/18898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.609375, "loss": 2.337921142578125 }, { "id": "identity_241869", "image": "IconQA/images/4404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes tall would two flowers be if stacked on top of each other?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.703125, "loss": 2.232686996459961 }, { "id": "identity_291951", "image": "PMC-VQA/images/PMC4353421_fig1_365270.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion detected on the computed tomography scan located? \nChoices:\n(A) Upper lobe bronchus \n(B) Middle lobe bronchus \n(C) Lower lobe bronchus \n(D) No description given" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.328125, "loss": 4.0058112144470215 }, { "id": "identity_313040", "image": "TextVQA/images/0dade82f8d16aeeb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this sign telling you to do?" }, { "from": "gpt", "value": "The answer is stop" } ], "clip_score": 14.7578125, "loss": 2.760491132736206 }, { "id": "identity_205634", "image": "VQA2.0/images/563299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the graffiti in the background?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 27.140625, "loss": 4.815102577209473 }, { "id": "identity_76750", "image": "AI2D/images/807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in the food web?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.703125, "loss": 2.7161595821380615 }, { "id": "identity_61951", "image": "DocVQA/images/jybx0223_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Book Value Per Share (Rs) in the year 2004-05?" }, { "from": "gpt", "value": "The answer is 31.20" } ], "clip_score": 22.25, "loss": 2.10552716255188 }, { "id": "identity_206301", "image": "VQA2.0/images/515518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is on the table?" }, { "from": "gpt", "value": "The answer is Smartphone" } ], "clip_score": 20.453125, "loss": 2.485069751739502 }, { "id": "identity_184247", "image": "PMC-VQA/images/PMC7244456_Fig1_176475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the scaphoid bone completely separated in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.0625, "loss": 3.0137436389923096 }, { "id": "identity_59779", "image": "DocVQA/images/yshb0228_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least expenditure?" }, { "from": "gpt", "value": "The answer is Libraries" } ], "clip_score": 23.0, "loss": 2.6505203247070312 }, { "id": "identity_434", "image": "TQA/images/question_images/optics_lense_types_9158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the widest image shown in the diagram?\nChoices:\n(A) negative meniscus\n(B) biconvex\n(C) plano-convex\n(D) positive meniscus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.8125, "loss": 4.050566673278809 }, { "id": "identity_296396", "image": "AI2D/images/961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term used to describe a shape with a heart-shaped outline?\nChoices:\n(A) oblique\n(B) cordate\n(C) hastate\n(D) rounded" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.9375, "loss": 3.813217878341675 }, { "id": "identity_44436", "image": "FigureQA/images/77182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Medium Seafoam at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 34" } ], "clip_score": 27.453125, "loss": 2.882507801055908 }, { "id": "identity_311999", "image": "A-OKVQA/images/581071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is sold inside this place?\nChoices:\n(A) tires\n(B) groceries food\n(C) jewels\n(D) paper hats" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.078125, "loss": 4.129303932189941 }, { "id": "identity_161971", "image": "GeoQA+/images/1971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,BD、CE是△ABC的中线,BD与CE相交于点O,点F、G分别是BO、CO的中点,连接AO.若AO=6cm,BC=8cm,求线段BD的长度是多少厘米?" }, { "from": "gpt", "value": "The answer is 8厘米" } ], "clip_score": 27.859375, "loss": 2.229868173599243 }, { "id": "identity_86778", "image": "Geometry3K/images/1820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of segment PT" }, { "from": "gpt", "value": "The answer is PT = √725 or approximately 26.9258" } ], "clip_score": 30.609375, "loss": 2.387075662612915 }, { "id": "identity_122252", "image": "PMC-VQA/images/PMC7298960_Fig1_193832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.25, "loss": 2.6939382553100586 }, { "id": "identity_5385", "image": "PlotQA/images/66776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there on the 2nd tick from the bottom ?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 18.65625, "loss": 2.454285144805908 }, { "id": "identity_237727", "image": "VQA2.0/images/117349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated at the table, and what is the color of the knife handle on the table?" }, { "from": "gpt", "value": "The answer is four people, brown" } ], "clip_score": 21.078125, "loss": 3.8692047595977783 }, { "id": "identity_4003", "image": "FigureQA/images/20172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue greater than Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 4.206762313842773 }, { "id": "identity_10867", "image": "A-OKVQA/images/384659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might one of the cars be headed?\nChoices:\n(A) city\n(B) mountains\n(C) beach\n(D) country" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5625, "loss": 4.2196736335754395 }, { "id": "identity_45521", "image": "DVQA/images/bar_train_00182776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value for 'effect' in the 'critic' category greater than the value for 'brief' in the 'attack' category?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.875, "loss": 3.332162380218506 }, { "id": "identity_81416", "image": "UniGeo/images/2379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC in the given image?" }, { "from": "gpt", "value": "The answer is cos^(-1)(2/3)" } ], "clip_score": 32.34375, "loss": 2.0679659843444824 }, { "id": "identity_25750", "image": "TextVQA/images/5fca28087c8a614e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what companies sponsor this event?" }, { "from": "gpt", "value": "The answer is nissan" } ], "clip_score": 15.5703125, "loss": 2.192422866821289 }, { "id": "identity_320405", "image": "A-OKVQA/images/304930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What OS is displayed on the screen of the laptop on the right?\nChoices:\n(A) windows xp\n(B) macos\n(C) windows 10\n(D) windows vista" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.8671875, "loss": 3.7235114574432373 }, { "id": "identity_185479", "image": "PMC-VQA/images/PMC6823093_FIG1_59236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text or label present in the image?" }, { "from": "gpt", "value": "The answer is Yes, \"Right ICA" } ], "clip_score": 17.96875, "loss": 4.738753318786621 }, { "id": "identity_92592", "image": "IconQA/images/56571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many inches does the nail extend beyond the 3-inch mark on the ruler?" }, { "from": "gpt", "value": "The answer is 0.75 inches" } ], "clip_score": 36.125, "loss": 1.5385897159576416 }, { "id": "identity_269423", "image": "FigureQA/images/85204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Periwinkle the highest in value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.52902889251709 }, { "id": "identity_290297", "image": "MapQA/images/map_4864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Louisiana the state with the highest value in the USA?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.359375, "loss": 2.638575315475464 }, { "id": "identity_117764", "image": "PMC-VQA/images/PMC5658478_Fig1_231972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a mass or abnormal growth in the kidney region?" }, { "from": "gpt", "value": "The answer is Yes, there is evidence of a mass" } ], "clip_score": 27.578125, "loss": 1.9918222427368164 }, { "id": "identity_129148", "image": "A-OKVQA/images/512156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the STOP sign visible through the window?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 11.6875, "loss": 3.2558932304382324 }, { "id": "identity_216453", "image": "TextVQA/images/655f365c1423e681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the crayon located at the bottom right corner of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.125, "loss": 3.7788517475128174 }, { "id": "identity_221785", "image": "A-OKVQA/images/221004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are visible on the left side of the image?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 28.203125, "loss": 2.3606114387512207 }, { "id": "identity_154586", "image": "ScienceQA/images/18634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is closest to the Gulf of Mexico?" }, { "from": "gpt", "value": "The answer is Florida" } ], "clip_score": 26.921875, "loss": 2.7337470054626465 }, { "id": "identity_276407", "image": "VQA2.0/images/508722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Would you consider this event to be large?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.765625, "loss": 3.5166420936584473 }, { "id": "identity_70847", "image": "AI2D/images/3122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color wire is connected to the brushes?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.9375, "loss": 2.209484338760376 }, { "id": "identity_303322", "image": "PMC-VQA/images/PMC8727688_f2_156113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where were the initial metastatic lesions found?\nChoices:\n(A) Left lower chest wall\n(B) Right lower chest wall\n(C) Upper chest wall\n(D) Spine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.984375, "loss": 4.513482570648193 }, { "id": "identity_201463", "image": "PMC-VQA/images/PMC2375410_fig4_22352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are the arrowheads pointing to in the image?" }, { "from": "gpt", "value": "The answer is Smooth muscle cells" } ], "clip_score": 28.5625, "loss": 3.1428658962249756 }, { "id": "identity_316431", "image": "VQA2.0/images/160325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this some kind of carriage?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.421875, "loss": 3.4833571910858154 }, { "id": "identity_274930", "image": "PlotQA/images/104916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the values on the major ticks of the Y-axis use scientific E-notation?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.640625, "loss": 3.353188991546631 }, { "id": "identity_38270", "image": "GeoQA+/images/4902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的直径,C、D是⊙O上两点,若∠ABC=50°,则∠D的度数为()\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 2.768867015838623 }, { "id": "identity_263197", "image": "DocVQA/images/rqxf0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the document?" }, { "from": "gpt", "value": "The answer is PUBLIC HEALTH AND PREVENTIVE MEDICINE" } ], "clip_score": 23.375, "loss": 1.032625675201416 }, { "id": "identity_48363", "image": "DVQA/images/bar_train_00163214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithm 'sitter' across all datasets?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 34.46875, "loss": 2.5030910968780518 }, { "id": "identity_25102", "image": "PMC-VQA/images/PMC3677018_fig1_210063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are shown by the red arrows in the image?\nChoices:\n(A) Cysts in the right testis \n(B) Cysts in the left testis \n(C) Cysts in the external iliac artery \n(D) Cysts in the obturator canal " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.71875, "loss": 4.290298938751221 }, { "id": "identity_246455", "image": "TextVQA/images/5132f0a0c25030bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the monitor and the headphones?" }, { "from": "gpt", "value": "The answer is Bose and BenQ" } ], "clip_score": 19.34375, "loss": 2.9631752967834473 }, { "id": "identity_123521", "image": "PMC-VQA/images/PMC4129165_fig3_312859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measurement unit used in this image?" }, { "from": "gpt", "value": "The answer is Centimeters" } ], "clip_score": 21.640625, "loss": 2.834963083267212 }, { "id": "identity_51067", "image": "DocVQA/images/jggn0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the patient's height?" }, { "from": "gpt", "value": "The answer is 76 inches" } ], "clip_score": 19.046875, "loss": 2.399211883544922 }, { "id": "identity_91678", "image": "Geometry3K/images/103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle KJL?" }, { "from": "gpt", "value": "The answer is 59 degrees" } ], "clip_score": 35.625, "loss": 2.6704773902893066 }, { "id": "identity_59744", "image": "DVQA/images/bar_train_00137713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the 'ticket' category?" }, { "from": "gpt", "value": "The answer is pause" } ], "clip_score": 29.015625, "loss": 3.8201560974121094 }, { "id": "identity_19048", "image": "DVQA/images/bar_train_00152671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.8125, "loss": 3.4859843254089355 }, { "id": "identity_41808", "image": "DocVQA/images/jnjm0223_59.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any studies with the status \"Completed\"?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.6796875, "loss": 3.2717576026916504 }, { "id": "identity_255296", "image": "TabMWP/images/14929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Suzie need to buy two books signed by a famous author, three CDs signed by a famous band, and one program signed by a famous dancer?" }, { "from": "gpt", "value": "The answer is 269.13" } ], "clip_score": 30.65625, "loss": 2.623148202896118 }, { "id": "identity_75042", "image": "TQA/images/question_images/parts_microscope_7180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the diaphragm on a microscope?" }, { "from": "gpt", "value": "The answer is It controls light intensity" } ], "clip_score": 32.46875, "loss": 3.520569086074829 }, { "id": "identity_301792", "image": "TextVQA/images/4201d4324a31719b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which cellular company in orange is shown here?" }, { "from": "gpt", "value": "The answer is at&t" } ], "clip_score": 18.78125, "loss": 2.350280284881592 }, { "id": "identity_312338", "image": "FigureQA/images/24783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Salmon the lower value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.875, "loss": 4.003578186035156 }, { "id": "identity_149320", "image": "DocVQA/images/sykk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which publication has the earliest issue date mentioned?" }, { "from": "gpt", "value": "The answer is The Sugarbeet, Summer 1972" } ], "clip_score": 29.046875, "loss": 2.8650405406951904 }, { "id": "identity_30026", "image": "A-OKVQA/images/523349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The instrument in the picture is used to play for?\nChoices:\n(A) snowboarding\n(B) skiing\n(C) surfing\n(D) skating" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.578125, "loss": 4.061620235443115 }, { "id": "identity_192251", "image": "PMC-VQA/images/PMC5241658_f5_115343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of coils visible in the SEM image?" }, { "from": "gpt", "value": "The answer is Five coils" } ], "clip_score": 29.625, "loss": 3.20965838432312 }, { "id": "identity_1293", "image": "GeoQA+/images/2157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的切线,点A为切点,OB交⊙O于点C,点D在⊙O上,连接AD、CD,OA,若∠ABO=20°,则∠ADC的度数为()\nChoices:\n(A) 20°\n(B) 30°\n(C) 35°\n(D) 40°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 2.66571044921875 }, { "id": "identity_185006", "image": "TextVQA/images/000209d575f3aa4f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 10.5390625, "loss": 3.475191831588745 }, { "id": "identity_288823", "image": "PMC-VQA/images/PMC8124899_ijms-22-04667-f009_442190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the images provided, which regions of the hippocampus are depicted?\nChoices:\n(A) CA1 and CA3b\n(B) CA1 and CA3c\n(C) CA2 and CA3b\n(D) CA2 and CA3c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 3.533466339111328 }, { "id": "identity_192775", "image": "VizWiz/images/VizWiz_train_00023869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a reflection on the laptop screen?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.0625, "loss": 2.882765293121338 }, { "id": "identity_302803", "image": "GeoQA+/images/1784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把△ABC纸片沿DE折叠,当点C落在某个四边形的外部时,此时测得∠1=108°,∠C=35°,则∠2的度数为()\n\nChoices:\n(A) 35°\n(B) 36°\n(C) 37°\n(D) 38°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.3125, "loss": 2.358179807662964 }, { "id": "identity_296928", "image": "PMC-VQA/images/PMC7773283_pone.0244090.g002_332399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the mid-sagittal high-resolution T2-weighted MRI display?\nChoices:\n(A) Axial PC-MRI \n(B) The entire SAS geometry \n(C) CSF velocity profiles \n(D) A representative cynomolgus monkey." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.65625, "loss": 3.797916889190674 }, { "id": "identity_330308", "image": "GeoQA+/images/544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在一个直角三角形中,∠C=90°,AB=10,AD平分∠BAC交边BC于D点.若CD=3,则△ABD的面积为()\n选项:\n(A) 15\n(B) 30\n(C) 10\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.1875, "loss": 2.35897159576416 }, { "id": "identity_136438", "image": "DocVQA/images/sqcy0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the moderator for the Opening Plenary Session on August 15?" }, { "from": "gpt", "value": "The answer is Germain J. Brisson, Ph.D" } ], "clip_score": 27.046875, "loss": 2.1375317573547363 }, { "id": "identity_183403", "image": "PMC-VQA/images/PMC8616551_cancers-13-05745-f006_120190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification scale indicated by the black bar in the middle image?" }, { "from": "gpt", "value": "The answer is 50 micrometers" } ], "clip_score": 19.703125, "loss": 2.2147140502929688 }, { "id": "identity_241491", "image": "VQA2.0/images/529860.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the dog's fur and is it licking its nose?" }, { "from": "gpt", "value": "The answer is The dog's fur is light brown, and it is licking its nose" } ], "clip_score": 22.296875, "loss": 1.4302973747253418 }, { "id": "identity_306060", "image": "GeoQA+/images/3480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,▱ABCD的面积是8,对角线AC、DB交于点O,EF过点O分别交AD、BC于E、F,则阴影部分的面积是多少?\n选项:\n(A) 4\n(B) 2\n(C) 6\n(D) 无法确定" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.59375, "loss": 2.3340225219726562 }, { "id": "identity_178082", "image": "IconQA/images/24501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips, laid end to end, would it take to reach the length of two lines combined?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.65625, "loss": 2.1168975830078125 }, { "id": "identity_191657", "image": "A-OKVQA/images/174188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas are suspended in the air?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 21.59375, "loss": 2.2613525390625 }, { "id": "identity_115343", "image": "VQA2.0/images/574349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals lying down in the image?" }, { "from": "gpt", "value": "The answer is Yes, one animal" } ], "clip_score": 22.171875, "loss": 4.290745735168457 }, { "id": "identity_277176", "image": "GeoQA+/images/5039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure, if the central angle ∠BOC is 78°, what is the degree measure of the circumference angle ∠BAC?\nChoices:\n(A) 156°\n(B) 78°\n(C) 39°\n(D) 12°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.9375, "loss": 3.062648057937622 }, { "id": "identity_181521", "image": "CLEVR-Math/images/CLEVR_train_008868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the purple cube?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.765625, "loss": 2.742990255355835 }, { "id": "identity_147937", "image": "DocVQA/images/fsph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of favors will be given out at the event?" }, { "from": "gpt", "value": "The answer is Pamphlet, sugar packet, shopping bags, Polaroid pictures" } ], "clip_score": 20.453125, "loss": 3.5490760803222656 }, { "id": "identity_308119", "image": "PMC-VQA/images/PMC8272411_fig-2_7328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue arrow indicate in the chest CT reconstruction?\nChoices:\n(A) Fistulous connection \n(B) Lung inflammation \n(C) Heart attack \n(D) Pseudoaneurysm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.96875, "loss": 5.1175127029418945 }, { "id": "identity_102912", "image": "CLEVR-Math/images/CLEVR_train_043427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the green cylinder?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.40625, "loss": 2.785832405090332 }, { "id": "identity_221354", "image": "A-OKVQA/images/414552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.96875, "loss": 3.7152099609375 }, { "id": "identity_193121", "image": "PMC-VQA/images/PMC7808904_fig0020_343220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique is used in this histological image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) staining" } ], "clip_score": 33.28125, "loss": 0.9503145813941956 }, { "id": "identity_74636", "image": "ScienceQA/images/6870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is located to the west of the state marked with a yellow dot?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 31.84375, "loss": 2.8439388275146484 }, { "id": "identity_196162", "image": "PMC-VQA/images/PMC7068089_f2_132748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the lesion in Hounsfield units (HU)?" }, { "from": "gpt", "value": "The answer is Specific HU measurement based on the CT scan software's reading" } ], "clip_score": 30.53125, "loss": 3.677163600921631 }, { "id": "identity_229375", "image": "UniGeo/images/322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C在⊙O上,CO的延长线交AB于点D,BD=BO,∠A=50°,求∠C的度数。" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 32.15625, "loss": 1.817624807357788 }, { "id": "identity_13792", "image": "UniGeo/images/1247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,P为⊙O外一点,PA、PB分别切⊙O于A、B两点,连接AB、OP,OP与⊙O交于点D,连接BD,若∠APB=56°,则∠ABD为()\nChoices:\n(A) 28°\n(B) 30°\n(C) 31°\n(D) 34°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 2.5674564838409424 }, { "id": "identity_256073", "image": "DocVQA/images/srbm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did she complete her M.D. and how many years after that did she complete her M.S.?" }, { "from": "gpt", "value": "The answer is 1932 and 4 years" } ], "clip_score": 16.46875, "loss": 1.8517184257507324 }, { "id": "identity_286699", "image": "A-OKVQA/images/451389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the smallest number of players required to participate in a game of this sport?\nChoices:\n(A) eight\n(B) four\n(C) two\n(D) three" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.5625, "loss": 4.006043434143066 }, { "id": "identity_15149", "image": "VQA2.0/images/508722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a large event?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.890625, "loss": 3.19960355758667 }, { "id": "identity_123419", "image": "A-OKVQA/images/482265.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the plate in the image?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 14.9453125, "loss": 3.8012454509735107 }, { "id": "identity_253674", "image": "A-OKVQA/images/341838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the architectural style, language on the signs, and cultural decorations, identify the predominant cultural influence in this area and explain how these elements contribute to your conclusion" }, { "from": "gpt", "value": "The answer is Chinese" } ], "clip_score": 20.65625, "loss": 4.230651378631592 }, { "id": "identity_147278", "image": "PlotQA/images/39443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage of population using sanitation services between Ecuador and Small states in 2003?" }, { "from": "gpt", "value": "The answer is Approximately 10%" } ], "clip_score": 28.0, "loss": 2.172086477279663 }, { "id": "identity_112742", "image": "VQA2.0/images/574951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.6171875, "loss": 3.1544482707977295 }, { "id": "identity_296892", "image": "TabMWP/images/30481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among Devon, Cassie, Kristen, and Harold, who achieved the lowest score in the bowling game played by the friends?\nChoices:\n(A) Devon\n(B) Cassie\n(C) Kristen\n(D) Harold" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 40.15625, "loss": 3.950012683868408 }, { "id": "identity_157953", "image": "ScienceQA/images/13377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 31.15625, "loss": 2.9907636642456055 }, { "id": "identity_211914", "image": "VQA-AS/images/15816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the chair's upholstery?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.0, "loss": 3.483882188796997 }, { "id": "identity_192911", "image": "PMC-VQA/images/PMC7194504_f2_161884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a single infarct or multiple infarcts visible in the image?" }, { "from": "gpt", "value": "The answer is Multiple infarcts" } ], "clip_score": 27.765625, "loss": 2.3634166717529297 }, { "id": "identity_41911", "image": "FigureQA/images/62257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the slope of the Khaki line positive or negative?" }, { "from": "gpt", "value": "The answer is Negative" } ], "clip_score": 25.40625, "loss": 2.9122154712677 }, { "id": "identity_211153", "image": "PMC-VQA/images/PMC5944512_F4_312494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color seen in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.4375, "loss": 3.327441453933716 }, { "id": "identity_314887", "image": "TQA/images/question_images/cycle_rock_6742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the initial step in forming a sedimentary rock from an igneous or metamorphic rock?\nChoices:\n(A) Cooling\n(B) Melting\n(C) Compaction and cementation\n(D) Weathering and erosion" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.0, "loss": 4.553164005279541 }, { "id": "identity_84621", "image": "UniGeo/images/2715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram of rectangle ABCD, if the length of diagonal BD is to be found, what is the length of BD given that the area of the rectangle is 8?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.40625, "loss": 2.290095090866089 }, { "id": "identity_330688", "image": "PMC-VQA/images/PMC7939202_F1_385981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the hypointensities located on this MRI?\nChoices:\n(A) Basal ganglia\n(B) Hippocampus\n(C) Corticospinal tracts\n(D) Corpus callosum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.28125, "loss": 4.254520416259766 }, { "id": "identity_260392", "image": "TabMWP/images/12450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 3 pounds of lemon sours, 2 pounds of yogurt-covered peanuts, 5 pounds of chocolate-covered raisins, 1 pound of gummy bears, and 4 pounds of chocolates?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 31.59375, "loss": 3.1501071453094482 }, { "id": "identity_320012", "image": "VQA2.0/images/373697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people at the table?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.453125, "loss": 3.617929458618164 }, { "id": "identity_321833", "image": "VQA-AS/images/15475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person holding?" }, { "from": "gpt", "value": "The answer is bear" } ], "clip_score": 18.53125, "loss": 3.6589646339416504 }, { "id": "identity_322010", "image": "A-OKVQA/images/439614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the name of the boat come from?\nChoices:\n(A) germany\n(B) india\n(C) mexico\n(D) japanese" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.3125, "loss": 4.04464054107666 }, { "id": "identity_118371", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f007_217796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the abbreviation 'Cx' stand for in this image?" }, { "from": "gpt", "value": "The answer is Circumflex Artery" } ], "clip_score": 31.703125, "loss": 2.546067476272583 }, { "id": "identity_52486", "image": "FigureQA/images/57327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color occupies more area in the pie chart, Olive or Maroon?" }, { "from": "gpt", "value": "The answer is Maroon" } ], "clip_score": 44.25, "loss": 2.2782657146453857 }, { "id": "identity_336480", "image": "A-OKVQA/images/85284.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this place located?\nChoices:\n(A) culver city\n(B) portland\n(C) chicago\n(D) los angeles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.125, "loss": 3.792034149169922 }, { "id": "identity_129515", "image": "VQA2.0/images/423023.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the tablecloth on the table in the foreground?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.3125, "loss": 4.029624938964844 }, { "id": "identity_300494", "image": "AI2D/images/3313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What number is indicated by label B?\nChoices:\n(A) 5\n(B) 3\n(C) 6\n(D) 7" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.1640625, "loss": 4.0982866287231445 }, { "id": "identity_313818", "image": "Super-CLEVR/images/superCLEVR_new_000843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny minivan of the same color as the shiny bike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.9375, "loss": 5.360260486602783 }, { "id": "identity_275922", "image": "PMC-VQA/images/PMC7156185_F2_151427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the axial cut at the level of the static screw hole, which bone was imaged?\nChoices:\n(A) left distal femur\n(B) right proximal femur\n(C) right distal femur\n(D) left proximal femur" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 4.588298797607422 }, { "id": "identity_217330", "image": "A-OKVQA/images/385492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many monitors are on the desk?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.75, "loss": 2.041916847229004 }, { "id": "identity_236078", "image": "TQA/images/question_images/cycle_rock_6737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the process by which sedimentary rocks can transform into metamorphic rocks and then into igneous rocks, including the conditions required for each transformation" }, { "from": "gpt", "value": "The answer is Heat, pressure, melting, and cooling" } ], "clip_score": 30.828125, "loss": 2.90140438079834 }, { "id": "identity_168310", "image": "IconQA/images/12724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of squares to circles in the image?" }, { "from": "gpt", "value": "The answer is 4:1" } ], "clip_score": 20.65625, "loss": 1.7544233798980713 }, { "id": "identity_159990", "image": "GeoQA+/images/2269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given rhombus ABCD with BD = 8 and area = 24, what is the altitude of the rhombus?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.75, "loss": 2.5915019512176514 }, { "id": "identity_301025", "image": "DocVQA/images/gykb0019_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day was the meeting held for the proposed amendments?" }, { "from": "gpt", "value": "The answer is june 27 , 2002" } ], "clip_score": 21.1875, "loss": 2.3259756565093994 }, { "id": "identity_164315", "image": "GeoQA+/images/2548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In parallelogram ABCD, if angle A is 150 degrees, what is the measure of angle D?" }, { "from": "gpt", "value": "The answer is 150 degrees" } ], "clip_score": 33.78125, "loss": 1.5046204328536987 }, { "id": "identity_302361", "image": "PMC-VQA/images/PMC9042937_Fig3_265881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is marked in green in this image?\nChoices:\n(A) Nuclei \n(B) Vascular endothelium \n(C) Synapses \n(D) Axons" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.419508457183838 }, { "id": "identity_177999", "image": "TabMWP/images/15813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have an even number of athletes?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.609375, "loss": 2.590113878250122 }, { "id": "identity_131313", "image": "TextVQA/images/7f44e2b5ba2db7f2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is displayed on the refrigerator as a feature of its design?" }, { "from": "gpt", "value": "The answer is 2010" } ], "clip_score": 32.53125, "loss": 1.3321518898010254 }, { "id": "identity_163146", "image": "GeoQA+/images/3211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠EOC = 35°, what is the measure of ∠COD?" }, { "from": "gpt", "value": "The answer is 145°" } ], "clip_score": 30.6875, "loss": 1.2548933029174805 }, { "id": "identity_222353", "image": "VQA2.0/images/465045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the stripes on the train?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.296875, "loss": 3.339177370071411 }, { "id": "identity_246512", "image": "VizWiz/images/VizWiz_train_00011475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are visible on the plant in the flower vase?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 24.875, "loss": 2.3747241497039795 }, { "id": "identity_200123", "image": "VQA2.0/images/473384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus in the foreground?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.40625, "loss": 3.2102959156036377 }, { "id": "identity_3914", "image": "FigureQA/images/71512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Cyan intersect Navy Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.171875, "loss": 3.612400770187378 }, { "id": "identity_26311", "image": "IconQA/images/104377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bear. Which picture is eighth?\nChoices:\n(A) plane\n(B) train\n(C) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.640625, "loss": 3.888216018676758 }, { "id": "identity_186183", "image": "PMC-VQA/images/PMC3716634_F3_218838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the arrow pointing to a specific area in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.515625, "loss": 3.723633289337158 }, { "id": "identity_330471", "image": "PMC-VQA/images/PMC6922484_F3_87491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which level of the cervical spine is this image taken?\nChoices:\n(A) C1 level\n(B) C2 level\n(C) C3 level\n(D) C4 level" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.078125, "loss": 3.8946003913879395 }, { "id": "identity_95875", "image": "TabMWP/images/5773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of straight-A students from 2008 to 2012?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.328125, "loss": 2.6076414585113525 }, { "id": "identity_137689", "image": "FigureQA/images/66366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line intersects with Midnight Blue at approximately x-axis label 60?" }, { "from": "gpt", "value": "The answer is Light Seafoam" } ], "clip_score": 28.6875, "loss": 2.2446327209472656 }, { "id": "identity_332045", "image": "UniGeo/images/2154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在一个四边形中,AB=3,BC=5,∠A=130°,∠D=100°,AD=CD.若点E,F分别是边AD,CD的中点,则EF的长是()\n\nChoices:\n(A) √{2}\n(B) √{3}\n(C) 2\n(D) √{5}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.546875, "loss": 2.531555652618408 }, { "id": "identity_138669", "image": "DocVQA/images/tfmg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of this fax transmission?" }, { "from": "gpt", "value": "The answer is 5/26/98" } ], "clip_score": 34.96875, "loss": 2.654428482055664 }, { "id": "identity_74783", "image": "ScienceQA/images/19997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries on this map share a border with the highlighted country?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.984375, "loss": 2.524569272994995 }, { "id": "identity_2612", "image": "DVQA/images/bar_train_00114441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people like the most preferred object in the whole chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.4375, "loss": 2.2168619632720947 }, { "id": "identity_177533", "image": "TabMWP/images/32852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of seashells found by all the people combined?" }, { "from": "gpt", "value": "The answer is 142 seashells" } ], "clip_score": 36.65625, "loss": 1.9014438390731812 }, { "id": "identity_132049", "image": "A-OKVQA/images/151648.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tag on the cow's ear?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 25.71875, "loss": 2.75494647026062 }, { "id": "identity_138261", "image": "DocVQA/images/fpbw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total budget for the Medical Congress Interactive Satellite Symposium?" }, { "from": "gpt", "value": "The answer is $1,200,000" } ], "clip_score": 29.0, "loss": 1.3051903247833252 }, { "id": "identity_175771", "image": "IconQA/images/61785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles are in the second row?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.640625, "loss": 2.4427530765533447 }, { "id": "identity_333825", "image": "IconQA/images/50831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the count of these flowers even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.0683183670043945 }, { "id": "identity_100682", "image": "TabMWP/images/38167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the wait times?" }, { "from": "gpt", "value": "The answer is 65 minutes" } ], "clip_score": 28.734375, "loss": 3.246964931488037 }, { "id": "identity_122346", "image": "A-OKVQA/images/67015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is happening in the background?" }, { "from": "gpt", "value": "The answer is Painting" } ], "clip_score": 16.546875, "loss": 3.049574613571167 }, { "id": "identity_123074", "image": "VQA2.0/images/377465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual flowers can be counted in the vase?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 28.640625, "loss": 2.152777671813965 }, { "id": "identity_302930", "image": "UniGeo/images/1084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,E为DC延长线上一点,∠A=50°,则∠BCE的度数为多少?\n选项:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 130°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.8125, "loss": 2.571688175201416 }, { "id": "identity_329145", "image": "TabMWP/images/31864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many participants did exactly 65 push-ups?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 29.546875, "loss": 2.190617322921753 }, { "id": "identity_248352", "image": "IconQA/images/106475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many vertices are there in total for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 29.296875, "loss": 1.6447268724441528 }, { "id": "identity_276103", "image": "PlotQA/images/144099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the total of net taxes on products in 1961 and 1960 exceed the total of net taxes on products in 1963 and 1962 in every country?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.328125, "loss": 3.232558488845825 }, { "id": "identity_244356", "image": "GeoQA+/images/1918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若ABCD边AB=6cm,AD=8cm,∠A=120°,DE平分∠ADC,求四边形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 24√3 cm²" } ], "clip_score": 30.59375, "loss": 1.8487216234207153 }, { "id": "identity_307110", "image": "ScienceQA/images/16998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Fill in the blank to describe the diagram.\nSolute particles moved in both directions across the permeable membrane. But more solute particles moved across the membrane (). When there was an equal concentration on both sides, the particles reached equilibrium.\nChoices:\n(A) to the left than to the right\n(B) to the right than to the left" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.765625, "loss": 3.207312822341919 }, { "id": "identity_80985", "image": "GeoQA+/images/1862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, if AC is extended to D such that AC = 3CD and DH is drawn parallel to AB intersecting the extended line of BC at H, what is the length of segment DH if BC = 3?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 35.46875, "loss": 2.6917836666107178 }, { "id": "identity_75278", "image": "TQA/images/question_images/parts_worm_7327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many body segments are visible in the earthworm diagram?" }, { "from": "gpt", "value": "The answer is Count the number of segments" } ], "clip_score": 35.0625, "loss": 3.281592845916748 }, { "id": "identity_271149", "image": "DocVQA/images/gsfm0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the document?" }, { "from": "gpt", "value": "The answer is Child Nutrition Programs" } ], "clip_score": 34.03125, "loss": 1.4291495084762573 }, { "id": "identity_257841", "image": "A-OKVQA/images/389021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key elements in the large sign that indicate its purpose is advertisement, and how do these elements collectively convey this purpose?" }, { "from": "gpt", "value": "The answer is Product display, brand logo, visual appeal, call to action" } ], "clip_score": 17.796875, "loss": 3.732656955718994 }, { "id": "identity_81215", "image": "GeoQA+/images/2420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side AD in the diagram?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 27.75, "loss": 2.9869322776794434 }, { "id": "identity_77678", "image": "AI2D/images/4305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the belt located between Mars and Jupiter?" }, { "from": "gpt", "value": "The answer is Asteroid belt" } ], "clip_score": 31.828125, "loss": 1.951396107673645 }, { "id": "identity_230668", "image": "FigureQA/images/25333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the percentages of Dark Gold and Tomato, and how does it compare to the percentage of Steel Blue?" }, { "from": "gpt", "value": "The answer is 90% is greater than 10%" } ], "clip_score": 40.25, "loss": 2.633344888687134 }, { "id": "identity_118180", "image": "Super-CLEVR/images/superCLEVR_new_000957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there an object in the image that is both larger than the small gray bus and has a matte finish?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.828125, "loss": 3.0672857761383057 }, { "id": "identity_331328", "image": "DocVQA/images/zpyf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the estimated budget?" }, { "from": "gpt", "value": "The answer is $600" } ], "clip_score": 18.328125, "loss": 2.530608654022217 }, { "id": "identity_19743", "image": "AI2D/images/1324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, if all plants dies then mice population\nChoices:\n(A) decrease\n(B) remains the same\n(C) increase\n(D) NA" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.9375, "loss": 3.758596420288086 }, { "id": "identity_186681", "image": "PMC-VQA/images/PMC8570958_fig0001_104651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of abnormal metabolic activity in these images?" }, { "from": "gpt", "value": "The answer is Yes, there are signs of abnormal metabolic activity" } ], "clip_score": 22.375, "loss": 1.5101275444030762 }, { "id": "identity_171739", "image": "IconQA/images/47623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares tall is the letter after the transformation?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.046875, "loss": 2.20798397064209 }, { "id": "identity_82284", "image": "Geometry3K/images/1475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given m \\angle 3 = 110 and m \\angle 12 = 55, find the measure of \\angle 10" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 28.96875, "loss": 2.2936153411865234 }, { "id": "identity_245548", "image": "Super-CLEVR/images/superCLEVR_new_001927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color and size of the object that is to the left of the shiny bus, which is to the left of the small green shiny car?" }, { "from": "gpt", "value": "The answer is large and gray" } ], "clip_score": 30.59375, "loss": 3.079317808151245 }, { "id": "identity_153494", "image": "TQA/images/abc_question_images/parts_neuron_17226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled D?" }, { "from": "gpt", "value": "The answer is Myelin sheath" } ], "clip_score": 32.0, "loss": 2.6432950496673584 }, { "id": "identity_222673", "image": "VQA2.0/images/465975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of water body is shown in the image?" }, { "from": "gpt", "value": "The answer is River" } ], "clip_score": 20.6875, "loss": 3.778614044189453 }, { "id": "identity_286497", "image": "UniGeo/images/3228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, with side BC = 6 and height AD = 4, square EFGH is constructed with vertices E and F on side BC, and vertices H and G on sides AB and AC respectively. What is the length of a side of this square?\n(A) 3\n(B) 2.5\n(C) 2.4\n(D) 2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.46875, "loss": 2.6710853576660156 }, { "id": "identity_325880", "image": "PMC-VQA/images/PMC4371620_Fig5_370954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the numbers in the image indicate?\nChoices:\n(A) Temperature in degrees Celsius \n(B) Distance in micrometers \n(C) Time in hours \n(D) Depth in millimeters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.5, "loss": 5.174915313720703 }, { "id": "identity_30294", "image": "ScienceQA/images/4363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After the Akkadian Empire ended, many cities and empires tried to control Mesopotamia. Around the 1790s BCE, which empire started controlling Mesopotamia?\nChoices:\n(A) the Akkadian Empire\n(B) the Elamite Empire\n(C) the Babylonian Empire\n(D) the Neo-Sumerian Empire" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.8125, "loss": 4.051966190338135 }, { "id": "identity_132709", "image": "VQA-AS/images/8711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many logs are visible in the fireplace?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.046875, "loss": 4.264107704162598 }, { "id": "identity_217789", "image": "A-OKVQA/images/84499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the setting of the image?" }, { "from": "gpt", "value": "The answer is A street in a rural or semi-urban area" } ], "clip_score": 21.328125, "loss": 2.737435817718506 }, { "id": "identity_310763", "image": "ScienceQA/images/336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Boise\n(B) Topeka\n(C) Juneau\n(D) Buffalo" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5625, "loss": 4.857794284820557 }, { "id": "identity_278736", "image": "TabMWP/images/28795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of tickets sold at the aquarium in the past 7 days, based on their records?" }, { "from": "gpt", "value": "The answer is 72" } ], "clip_score": 31.125, "loss": 2.438251495361328 }, { "id": "identity_281217", "image": "A-OKVQA/images/253250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which part of the boat is the person in the red shirt standing? \n(A) bow \n(B) stern \n(C) transom \n(D) hull" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.453125, "loss": 4.72225284576416 }, { "id": "identity_160430", "image": "Geometry3K/images/1091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the side of the square?" }, { "from": "gpt", "value": "The answer is 17sqrt(2) / 2" } ], "clip_score": 35.84375, "loss": 2.1230854988098145 }, { "id": "identity_128158", "image": "PMC-VQA/images/PMC6933696_Fig3_91310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are visible on either side of the spine?" }, { "from": "gpt", "value": "The answer is Lungs" } ], "clip_score": 23.28125, "loss": 2.7818219661712646 }, { "id": "identity_88111", "image": "GeoQA+/images/4203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average depth of the swimming pool?" }, { "from": "gpt", "value": "The answer is 1.7 meters" } ], "clip_score": 28.046875, "loss": 2.247607707977295 }, { "id": "identity_4971", "image": "IconQA/images/91694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 10:00\n(B) 2:00" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.145017147064209 }, { "id": "identity_171810", "image": "TabMWP/images/13307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average quantity demanded for the given prices?" }, { "from": "gpt", "value": "The answer is 19,500" } ], "clip_score": 30.96875, "loss": 1.9367237091064453 }, { "id": "identity_93126", "image": "TabMWP/images/21064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Anna wants to buy one of each item, how much money does she need in total?" }, { "from": "gpt", "value": "The answer is $23" } ], "clip_score": 16.53125, "loss": 2.136866569519043 }, { "id": "identity_237052", "image": "DocVQA/images/txdw0217_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the table in the document and how many different treatments are listed in the table?" }, { "from": "gpt", "value": "The answer is Effects of Various Treatments on Vasomotor Symptoms: Randomized, Double-Blind, Placebo-Controlled Continuum, 3" } ], "clip_score": 22.625, "loss": 2.3614916801452637 }, { "id": "identity_191244", "image": "A-OKVQA/images/554636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus on the right side of the image?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 30.734375, "loss": 2.848234176635742 }, { "id": "identity_292865", "image": "AI2D/images/3032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the innermost layer from the following options:\n(A) cambium\n(B) xylem\n(C) Pith\n(D) phloem" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.15625, "loss": 4.097686290740967 }, { "id": "identity_297045", "image": "UniGeo/images/1997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, given that AB is parallel to CD and EF, AC = 4, CE = 1, BD = 3, what is the value of DF?\nChoices:\n(A) 1/2\n(B) 4/3\n(C) 3/4\n(D) 1" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 2.6114163398742676 }, { "id": "identity_273535", "image": "IconQA/images/16359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.34375, "loss": 3.0800583362579346 }, { "id": "identity_44725", "image": "FigureQA/images/2724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.96875, "loss": 2.4504494667053223 }, { "id": "identity_13444", "image": "TextVQA/images/df7966f0d306102e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what sport is on the shirt?" }, { "from": "gpt", "value": "The answer is lacrosse" } ], "clip_score": 27.234375, "loss": 2.768874406814575 }, { "id": "identity_253073", "image": "A-OKVQA/images/576650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are riding bicycles compared to those walking, and what is the ratio of cyclists to pedestrians?" }, { "from": "gpt", "value": "The answer is 1.25" } ], "clip_score": 26.328125, "loss": 2.9187545776367188 }, { "id": "identity_165617", "image": "Geometry3K/images/105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side y in the triangle?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 34.75, "loss": 2.452772617340088 }, { "id": "identity_10635", "image": "Geometry3K/images/419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use a Pythagorean Triple to find x.\nChoices:\n(A) 32\n(B) 34\n(C) 36\n(D) 38" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.84375, "loss": 3.246351718902588 }, { "id": "identity_315495", "image": "TQA/images/question_images/parts_fish_2905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fin on a fish aids in forward propulsion?\nChoices:\n(A) Caudal fin\n(B) Pectoral fin\n(C) Pelvic fin\n(D) Anal fin" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.3125, "loss": 3.9883408546447754 }, { "id": "identity_295734", "image": "MapQA/images/map_10995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Oklahoma have the highest value among the states bordering Missouri?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.671875, "loss": 2.531229019165039 }, { "id": "identity_129138", "image": "A-OKVQA/images/265799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rearview mirrors does the motorcycle have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.28125, "loss": 3.988887071609497 }, { "id": "identity_136150", "image": "MapQA/images/map_5338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with a number of neighbors in the range of 295-518?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.453125, "loss": 2.7849810123443604 }, { "id": "identity_205300", "image": "VizWiz/images/VizWiz_train_00007805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible door handle on the door?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.109375, "loss": 3.154158115386963 }, { "id": "identity_47965", "image": "DocVQA/images/fqyn0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the document?" }, { "from": "gpt", "value": "The answer is 2-26-88" } ], "clip_score": 18.515625, "loss": 2.381911039352417 }, { "id": "identity_11974", "image": "GeoQA+/images/22.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在平行四边形ABCD中,∠A=60°,则∠C的大小是()\nChoices:\n(A) 120°\n(B) 60°\n(C) 45°\n(D) 30°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.34375, "loss": 3.0326294898986816 }, { "id": "identity_62117", "image": "FigureQA/images/55416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 22.921875, "loss": 3.748678684234619 }, { "id": "identity_228113", "image": "UniGeo/images/2633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 在平行四边形ABCD中,对角线AC和BD交于点O,E是CD的中点,连接OE,若AD=5,CD=4,且AB=6,则三角形AOB的面积是多少?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 22.75, "loss": 2.821143865585327 }, { "id": "identity_182663", "image": "TextVQA/images/5805fe7db047c76a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap worn by the person in the foreground?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 26.828125, "loss": 3.52911639213562 }, { "id": "identity_52076", "image": "FigureQA/images/70575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Burlywood have any data points below 60?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.328125, "loss": 2.9732260704040527 }, { "id": "identity_166751", "image": "TabMWP/images/16692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of oars purchased by Frank over the two years?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 22.234375, "loss": 2.0683577060699463 }, { "id": "identity_51338", "image": "FigureQA/images/83071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value on the y-axis label for Chartreuse?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 26.203125, "loss": 2.5741331577301025 }, { "id": "identity_128613", "image": "PMC-VQA/images/PMC4660938_F0003_448112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any lymph nodes be identified in the image?" }, { "from": "gpt", "value": "The answer is No lymph nodes are clearly identifiable" } ], "clip_score": 26.75, "loss": 2.50331974029541 }, { "id": "identity_139645", "image": "MapQA/images/map_5614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with revenue less than 524?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.875, "loss": 2.7653682231903076 }, { "id": "identity_211430", "image": "A-OKVQA/images/188955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the ski jackets?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.03125, "loss": 3.804290294647217 }, { "id": "identity_152016", "image": "ScienceQA/images/15715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the flower pot?" }, { "from": "gpt", "value": "The answer is Cylindrical" } ], "clip_score": 36.4375, "loss": 2.3556811809539795 }, { "id": "identity_143604", "image": "DocVQA/images/fpmj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much is the Printed Materials Fund?" }, { "from": "gpt", "value": "The answer is $810.00" } ], "clip_score": 21.78125, "loss": 1.9590741395950317 }, { "id": "identity_165345", "image": "GeoQA+/images/4549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, ∠DAB = 70°, and AD is perpendicular to the tangent at C, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 35.75, "loss": 1.4642361402511597 }, { "id": "identity_320178", "image": "TextVQA/images/00df620fc2790ae5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of turtles are these?" }, { "from": "gpt", "value": "The answer is teenage mutant ninja" } ], "clip_score": 28.875, "loss": 1.9095466136932373 }, { "id": "identity_169745", "image": "IconQA/images/8769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diagonal of the rectangle?" }, { "from": "gpt", "value": "The answer is √65 meters" } ], "clip_score": 32.125, "loss": 3.1893978118896484 }, { "id": "identity_320654", "image": "TextVQA/images/84907cf5f16cf785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this located?" }, { "from": "gpt", "value": "The answer is boston, mass" } ], "clip_score": 19.96875, "loss": 2.4388651847839355 }, { "id": "identity_88163", "image": "GeoQA+/images/3474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOB in the rhombus ABCD?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.59375, "loss": 1.7921242713928223 }, { "id": "identity_71285", "image": "AI2D/images/3510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label 4 represent in the cell diagram?" }, { "from": "gpt", "value": "The answer is Cytoskeleton" } ], "clip_score": 23.90625, "loss": 1.7543631792068481 }, { "id": "identity_70940", "image": "TQA/images/abc_question_images/ocean_waves_19153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter Y indicate in the diagram?" }, { "from": "gpt", "value": "The answer is Y indicates the Crest" } ], "clip_score": 27.46875, "loss": 3.7541606426239014 }, { "id": "identity_254454", "image": "A-OKVQA/images/551194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the family of animals the children are riding and list three characteristics that are typical of this family" }, { "from": "gpt", "value": "The answer is Equidae; odd-toed ungulates, long heads with manes, herbivorous" } ], "clip_score": 21.765625, "loss": 3.2510640621185303 }, { "id": "identity_71696", "image": "AI2D/images/515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary consumers can you identify in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 31.6875, "loss": 4.9529829025268555 }, { "id": "identity_126058", "image": "PMC-VQA/images/PMC3535131_Fig5_176625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What bone is visible in the X-ray image?" }, { "from": "gpt", "value": "The answer is Tibia, fibula, and ankle joint" } ], "clip_score": 32.46875, "loss": 2.3946220874786377 }, { "id": "identity_35682", "image": "CLEVR-Math/images/CLEVR_train_021053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cyan cubes. How many yellow cylinders are left?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.296875, "loss": 2.3397490978240967 }, { "id": "identity_51499", "image": "DVQA/images/bar_train_00054441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest sales in the 'basis' store?" }, { "from": "gpt", "value": "The answer is attack" } ], "clip_score": 30.328125, "loss": 4.085132122039795 }, { "id": "identity_149423", "image": "DocVQA/images/gqgl0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the foundation mentioned in the document?" }, { "from": "gpt", "value": "The answer is SUGAR RESEARCH FOUNDATION, Inc" } ], "clip_score": 35.34375, "loss": 1.65325129032135 }, { "id": "identity_283253", "image": "IconQA/images/74485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of rectangles present among the given choices?\nChoices:\n(A) 4\n(B) 9\n(C) 1\n(D) 8\n(E) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.625, "loss": 2.9420413970947266 }, { "id": "identity_23641", "image": "IconQA/images/88852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many triangles are there?\nChoices:\n(A) 3\n(B) 1\n(C) 2" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.328125, "loss": 3.251068592071533 }, { "id": "identity_235658", "image": "TabMWP/images/32716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of tickets sold over the 4 days, and what is the average number of tickets sold per day?" }, { "from": "gpt", "value": "The answer is 5,028.75 tickets" } ], "clip_score": 37.25, "loss": 2.8216822147369385 }, { "id": "identity_7715", "image": "FigureQA/images/4643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.031189441680908 }, { "id": "identity_46458", "image": "FigureQA/images/76422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of the Saddle Brown line at the 50 x-axis_label mark?" }, { "from": "gpt", "value": "The answer is 97.8" } ], "clip_score": 27.40625, "loss": 2.568812847137451 }, { "id": "identity_326658", "image": "TextVQA/images/9ff36f284db50ec9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this store?" }, { "from": "gpt", "value": "The answer is penney's" } ], "clip_score": 18.3125, "loss": 2.3421947956085205 }, { "id": "identity_302734", "image": "DVQA/images/bar_train_00119021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of travel in escape larger than the value of pile in shock?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.046875, "loss": 3.7713286876678467 }, { "id": "identity_17675", "image": "A-OKVQA/images/298135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the man lagging behind's hairstyle resemble?\nChoices:\n(A) tonsure\n(B) bouffant\n(C) mullet\n(D) mohawk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.859375, "loss": 4.538710594177246 }, { "id": "identity_119959", "image": "PMC-VQA/images/PMC4994314_Fig1_65493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the calcified nodule in relation to the spine on the DCR image?" }, { "from": "gpt", "value": "The answer is Lateral to the spine" } ], "clip_score": 27.828125, "loss": 2.9561688899993896 }, { "id": "identity_165716", "image": "GeoQA+/images/1020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if AD is the median and E is the midpoint of AD, what is the measure of angle ABE if angle BAC is 60 degrees?" }, { "from": "gpt", "value": "The answer is 75 degrees" } ], "clip_score": 33.65625, "loss": 1.9013603925704956 }, { "id": "identity_329722", "image": "VizWiz/images/VizWiz_train_00012248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is shredded mango" } ], "clip_score": 17.546875, "loss": 1.8826879262924194 }, { "id": "identity_115549", "image": "VQA2.0/images/119760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the rug in the center of the room?" }, { "from": "gpt", "value": "The answer is Floral or botanical pattern" } ], "clip_score": 19.921875, "loss": 3.4950168132781982 }, { "id": "identity_22537", "image": "A-OKVQA/images/105732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being given here?\nChoices:\n(A) lesson\n(B) music audition\n(C) olympic trials\n(D) initiation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.8125, "loss": 4.431588649749756 }, { "id": "identity_251180", "image": "A-OKVQA/images/144562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which player holds the all-time record for stolen bases in Major League Baseball, and how many stolen bases did they achieve?" }, { "from": "gpt", "value": "The answer is Rickey Henderson, 1,406 stolen bases" } ], "clip_score": 19.328125, "loss": 1.3755011558532715 }, { "id": "identity_194723", "image": "A-OKVQA/images/333150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop shown in the image?" }, { "from": "gpt", "value": "The answer is Acer" } ], "clip_score": 22.78125, "loss": 3.193344831466675 }, { "id": "identity_295894", "image": "TabMWP/images/21072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which destination on the hiking trails near Manuel's campground has a higher elevation from sea level, the underground lake in the small cave or the mountain peak?\nChoices:\n(A) underground lake\n(B) mountain peak" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.75, "loss": 4.338350772857666 }, { "id": "identity_90413", "image": "GEOS/images/048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ABCD is a parallelogram and triangle BCD has all equal angles, what is the area of triangle BCD?" }, { "from": "gpt", "value": "The answer is sqrt(3)" } ], "clip_score": 31.71875, "loss": 1.9221417903900146 }, { "id": "identity_200030", "image": "PMC-VQA/images/PMC6943562_molecules-24-04458-f003_94435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the largest particle in the image?" }, { "from": "gpt", "value": "The answer is Describe the shape of the largest particle" } ], "clip_score": 20.71875, "loss": 2.666696786880493 }, { "id": "identity_297351", "image": "TabMWP/images/18207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of stop signs reported by the town council on each street?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 39.4375, "loss": 3.1718709468841553 }, { "id": "identity_202249", "image": "A-OKVQA/images/422877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of crossing is painted on the road?" }, { "from": "gpt", "value": "The answer is Zebra crossing" } ], "clip_score": 24.484375, "loss": 2.21232271194458 }, { "id": "identity_219434", "image": "VQA2.0/images/473972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teddy bears are hanging in the image?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 30.6875, "loss": 2.6613495349884033 }, { "id": "identity_116368", "image": "TextVQA/images/90916ea379592a95.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the voltage range specified on this electronic component?" }, { "from": "gpt", "value": "The answer is 3V-5.5V" } ], "clip_score": 21.625, "loss": 1.5483146905899048 }, { "id": "identity_149247", "image": "DocVQA/images/zkvm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum number of weeks notice needed for arranging a survey visit?" }, { "from": "gpt", "value": "The answer is 12 weeks" } ], "clip_score": 14.3359375, "loss": 1.7917900085449219 }, { "id": "identity_179347", "image": "CLEVR-Math/images/CLEVR_train_012187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are of a matte texture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.21875, "loss": 2.679173231124878 }, { "id": "identity_37636", "image": "DVQA/images/bar_train_00145998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the most number of units summed across all the stores?" }, { "from": "gpt", "value": "The answer is crew" } ], "clip_score": 33.59375, "loss": 3.0360419750213623 }, { "id": "identity_161745", "image": "GeoQA+/images/2781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with angle B being 90 degrees, AB=12, and BC=5, what is the measure of angle A in degrees?" }, { "from": "gpt", "value": "The answer is 22.62" } ], "clip_score": 30.125, "loss": 2.742793083190918 }, { "id": "identity_86335", "image": "Geometry3K/images/2068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment YX?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.171875, "loss": 2.6182496547698975 }, { "id": "identity_231118", "image": "FigureQA/images/44986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the total does Light Slate represent, and how much more percentage does Bubblegum have compared to Light Slate?" }, { "from": "gpt", "value": "The answer is Light Slate represents 33.33%, and Bubblegum has 33.34% more" } ], "clip_score": 43.0, "loss": 1.2758079767227173 }, { "id": "identity_98044", "image": "TabMWP/images/13212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stops does the bus make between 10 A.M. and 12 P.M.?" }, { "from": "gpt", "value": "The answer is 5 stops" } ], "clip_score": 22.5625, "loss": 2.3765132427215576 }, { "id": "identity_133256", "image": "A-OKVQA/images/389974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of meat is being prepared on the cutting board?" }, { "from": "gpt", "value": "The answer is Beef or lamb" } ], "clip_score": 23.421875, "loss": 3.307297945022583 }, { "id": "identity_139193", "image": "DVQA/images/bar_train_00081664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any categories where 'chest' has a value less than 'pure'?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.765625, "loss": 3.101102352142334 }, { "id": "identity_169980", "image": "TabMWP/images/31338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the most expensive and least expensive book?" }, { "from": "gpt", "value": "The answer is $6" } ], "clip_score": 27.171875, "loss": 2.4596354961395264 }, { "id": "identity_142125", "image": "FigureQA/images/12362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Light Green" } ], "clip_score": 38.8125, "loss": 2.6376469135284424 }, { "id": "identity_99337", "image": "TabMWP/images/23995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Ernest has a $50 bill, how much change will he receive after buying a book about Australia and a book about snakes?" }, { "from": "gpt", "value": "The answer is $26.00" } ], "clip_score": 30.03125, "loss": 1.7543554306030273 }, { "id": "identity_142801", "image": "DocVQA/images/fsmp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date stamped on the image?" }, { "from": "gpt", "value": "The answer is November 26, 1975" } ], "clip_score": 19.875, "loss": 1.3803186416625977 }, { "id": "identity_13489", "image": "IconQA/images/74707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a bear. Which picture is fifth?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.53125, "loss": 4.237834930419922 }, { "id": "identity_139514", "image": "DVQA/images/bar_train_00089363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups have a \"danger\" value of 5 or more?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.3125, "loss": 2.162745475769043 }, { "id": "identity_311230", "image": "FigureQA/images/75523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.578125, "loss": 4.589735507965088 }, { "id": "identity_317113", "image": "ScienceQA/images/18157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.734375, "loss": 3.6065306663513184 }, { "id": "identity_53930", "image": "DocVQA/images/zpcg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sender requesting to be reproduced before their departure?" }, { "from": "gpt", "value": "The answer is Introductory remarks" } ], "clip_score": 18.609375, "loss": 3.5403971672058105 }, { "id": "identity_26852", "image": "TextVQA/images/9d0d88bbbbf4572e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name on the orange box on the bottom right?" }, { "from": "gpt", "value": "The answer is nescafe" } ], "clip_score": 19.828125, "loss": 2.457016944885254 }, { "id": "identity_20437", "image": "GeoQA+/images/3563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,M、F是⊙A上的两点,AM的垂直平分线与⊙A交于B、C两点,与线段AF交于D点,连接DM,若∠MBF=20°,则∠DMF=()\nChoices:\n(A) 30°\n(B) 29°\n(C) 28°\n(D) 20°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 2.506927013397217 }, { "id": "identity_330869", "image": "TabMWP/images/11984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For a project, Manuel counted the number of classrooms in each school in the city. How many schools have at least this many classrooms?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 29.140625, "loss": 2.0251948833465576 }, { "id": "identity_276278", "image": "Super-CLEVR/images/superCLEVR_new_000205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the small red utility bikes on the right side of the small red metal utility bike equal the number of big gray metal cars?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.21875, "loss": 4.833099365234375 }, { "id": "identity_48932", "image": "FigureQA/images/29544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Light Gold in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 40%" } ], "clip_score": 36.21875, "loss": 2.252446413040161 }, { "id": "identity_52435", "image": "DocVQA/images/ykdv0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the transaction?" }, { "from": "gpt", "value": "The answer is 19-75" } ], "clip_score": 20.46875, "loss": 3.1607894897460938 }, { "id": "identity_260294", "image": "ScienceQA/images/5514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property do these three objects have in common, and which one of these objects is the least dense?" }, { "from": "gpt", "value": "The answer is feather" } ], "clip_score": 19.34375, "loss": 4.144716262817383 }, { "id": "identity_176274", "image": "TabMWP/images/24952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of boxes sold over the past 7 days?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.78125, "loss": 2.6862103939056396 }, { "id": "identity_86570", "image": "Geometry3K/images/697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If KLMN is a parallelogram, find the length of diagonal NJ" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 36.125, "loss": 2.4383738040924072 }, { "id": "identity_59190", "image": "FigureQA/images/19558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of the Coral section in the pie chart?" }, { "from": "gpt", "value": "The answer is 0.56 or 56%" } ], "clip_score": 31.515625, "loss": 1.4711599349975586 }, { "id": "identity_147164", "image": "MapQA/images/map_2302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with a number of driers in the range of 565195-598804?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.234375, "loss": 2.59540057182312 }, { "id": "identity_288129", "image": "ScienceQA/images/1560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these four objects share among the options of being slippery, smooth, or hard?\nChoices:\n(A) slippery\n(B) smooth\n(C) hard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 3.9834046363830566 }, { "id": "identity_280444", "image": "TextVQA/images/77381c9a2c6a84a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this monitor an LED monitor?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 30.484375, "loss": 3.5802245140075684 }, { "id": "identity_7937", "image": "ScienceQA/images/7271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement describes the Kaeng Krachan National Park ecosystem?\nChoices:\n(A) It has many different types of organisms.\n(B) It has mostly small plants." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.78125, "loss": 3.9668946266174316 }, { "id": "identity_5724", "image": "Geometry3K/images/853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Circle L has a radius of 32 centimeters. L H \\perp G J, and G J = 40 centimeters. Find L K.\nChoices:\n(A) 20\n(B) \\sqrt { 624 }\n(C) 40\n(D) 624" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.078125, "loss": 2.3036656379699707 }, { "id": "identity_172510", "image": "TabMWP/images/305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in cost between taking a bicycle on the Seattle-Bremerton ferry and the Southport-Fort Fisher ferry?" }, { "from": "gpt", "value": "The answer is $6" } ], "clip_score": 40.71875, "loss": 2.6996829509735107 }, { "id": "identity_163270", "image": "GeoQA+/images/1018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In right triangle ABC, with ∠C=90°, side a=1, and side c=4, what is the length of side b?" }, { "from": "gpt", "value": "The answer is b = √15" } ], "clip_score": 31.84375, "loss": 1.8989852666854858 }, { "id": "identity_178920", "image": "IconQA/images/29964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If this clock gains 5 minutes every hour, what will the time be after 2 hours?" }, { "from": "gpt", "value": "The answer is 8:10" } ], "clip_score": 30.8125, "loss": 1.6908189058303833 }, { "id": "identity_117619", "image": "PMC-VQA/images/PMC8342271_Fig3_27574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technology was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 30.25, "loss": 2.927764654159546 }, { "id": "identity_231532", "image": "IconQA/images/14651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each snowman needs 2 carrot noses, are there enough carrot noses for every snowman?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 37.375, "loss": 3.1380295753479004 }, { "id": "identity_113961", "image": "TextVQA/images/02d94afde962b96b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year was the establishment on the right founded?" }, { "from": "gpt", "value": "The answer is 1999" } ], "clip_score": 15.453125, "loss": 1.7597813606262207 }, { "id": "identity_327658", "image": "IconQA/images/52680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there enough for each snail?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.453125, "loss": 3.665306806564331 }, { "id": "identity_130760", "image": "Super-CLEVR/images/superCLEVR_new_000434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 18.125, "loss": 4.873415946960449 }, { "id": "identity_218733", "image": "VQA2.0/images/28103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the tennis court?" }, { "from": "gpt", "value": "The answer is Clay" } ], "clip_score": 27.4375, "loss": 3.431833267211914 }, { "id": "identity_85539", "image": "GeoQA+/images/5373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AED in the figure?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 29.921875, "loss": 1.630284309387207 }, { "id": "identity_320513", "image": "A-OKVQA/images/206102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being recorded?\nChoices:\n(A) music\n(B) movie\n(C) podcast\n(D) youtube video" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.9609375, "loss": 4.053470134735107 }, { "id": "identity_190343", "image": "VQA2.0/images/27216.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tacos are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.671875, "loss": 4.350005149841309 }, { "id": "identity_271025", "image": "VQA-RAD/images/synpic27646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the description of the distribution shown in this image?" }, { "from": "gpt", "value": "The answer is Micronodular" } ], "clip_score": 20.9375, "loss": 4.7985100746154785 }, { "id": "identity_208331", "image": "A-OKVQA/images/332863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the hull of the ship in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.203125, "loss": 2.9426510334014893 }, { "id": "identity_21069", "image": "VizWiz/images/VizWiz_train_00009516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the name of this medicine?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.1953125, "loss": 3.7281875610351562 }, { "id": "identity_312477", "image": "ScienceQA/images/2967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the arrows, which of the following organisms is a consumer?\nChoices:\n(A) sea cucumber\n(B) kelp" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.078125, "loss": 3.5566048622131348 }, { "id": "identity_137306", "image": "ChartQA/images/two_col_41634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage represented by suppliers other than NKT Group and Nexans?" }, { "from": "gpt", "value": "The answer is 34%" } ], "clip_score": 29.8125, "loss": 1.8394814729690552 }, { "id": "identity_299137", "image": "PMC-VQA/images/PMC4114190_F3_309468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the red and blue colors in this image? \nChoices:\n(A) High and low frequency preference regions, respectively.\n(B) High and low temperature preference regions, respectively.\n(C) High and low altitude preference regions, respectively.\n(D) High and low humidity preference regions, respectively." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.890625, "loss": 3.8855957984924316 }, { "id": "identity_256210", "image": "ScienceQA/images/17118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total increase in temperature for both refrigerators combined" }, { "from": "gpt", "value": "The answer is 9.6°C" } ], "clip_score": 30.609375, "loss": 2.0900070667266846 }, { "id": "identity_22343", "image": "A-OKVQA/images/578131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the horse shown here taking part in?\nChoices:\n(A) steeple chase\n(B) racing\n(C) barrel racing\n(D) roping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.219966888427734 }, { "id": "identity_292608", "image": "VQA2.0/images/336291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this location have an urban setting?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.3515625, "loss": 3.883162021636963 }, { "id": "identity_160963", "image": "GeoQA+/images/2846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ADB=100° and O is the center of the larger circle, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 37.34375, "loss": 1.7696614265441895 }, { "id": "identity_244009", "image": "TextVQA/images/dc101e3f69a0a451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the numbers written in green on the signs visible in the image?" }, { "from": "gpt", "value": "The answer is 07" } ], "clip_score": 14.5625, "loss": 2.87367582321167 }, { "id": "identity_105077", "image": "Super-CLEVR/images/superCLEVR_new_001957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 21.578125, "loss": 5.135861873626709 }, { "id": "identity_269927", "image": "TabMWP/images/27750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the table provided, is there a shortage or surplus at a price of $660? Choose between:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.171875, "loss": 3.6312365531921387 }, { "id": "identity_59030", "image": "DocVQA/images/fkvd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is representing the Committee on Maternal Nutrition?" }, { "from": "gpt", "value": "The answer is Robert E. Shank, M.D" } ], "clip_score": 25.09375, "loss": 2.8849523067474365 }, { "id": "identity_35443", "image": "PMC-VQA/images/PMC7073084_ijms-21-01383-f003_134672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the scale bar indicate in the image? \nChoices:\n(A) Bacterial size \n(B) Human size \n(C) Amount of protein \n(D) Amount of starch granules " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.09375, "loss": 4.28054141998291 }, { "id": "identity_71101", "image": "ScienceQA/images/18831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of marine organism is shown in the image?" }, { "from": "gpt", "value": "The answer is Sea anemone" } ], "clip_score": 27.953125, "loss": 2.7089295387268066 }, { "id": "identity_160227", "image": "UniGeo/images/1612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if AD=5, BD=10, and DE is parallel to BC, what is the length of segment EC?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.53125, "loss": 2.1574108600616455 }, { "id": "identity_280497", "image": "A-OKVQA/images/194421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which skate maneuver is the boy in white executing? Is it a flip trick, nose slide, grind, or grab?\nChoices:\n(A) flip trick\n(B) nose slide\n(C) grind\n(D) grab" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.625, "loss": 4.13825798034668 }, { "id": "identity_87022", "image": "GeoQA+/images/2159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line segment AB is translated 2cm to form line segment CD, and the length of AB is 3cm, what is the length of line segment CD?" }, { "from": "gpt", "value": "The answer is 3cm" } ], "clip_score": 30.03125, "loss": 2.0932810306549072 }, { "id": "identity_151020", "image": "ScienceQA/images/5273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What poles of the magnets are facing each other in both pairs?" }, { "from": "gpt", "value": "The answer is South and North poles" } ], "clip_score": 23.953125, "loss": 2.2597341537475586 }, { "id": "identity_59001", "image": "ChartQA/images/6080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in opinion between the \"Dem/Lean Dem\" and \"Rep/Lean Rep\" groups regarding NAFTA being \"Bad for U.S.\"?" }, { "from": "gpt", "value": "The answer is 36%" } ], "clip_score": 30.359375, "loss": 2.629028558731079 }, { "id": "identity_251509", "image": "A-OKVQA/images/127296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing in the group near the motorcycle, and what is the profession of the person closest to the motorcycle?" }, { "from": "gpt", "value": "The answer is 8 people, cop" } ], "clip_score": 29.734375, "loss": 3.435856819152832 }, { "id": "identity_169141", "image": "TabMWP/images/1889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students scored less than 20 points?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.859375, "loss": 2.308314800262451 }, { "id": "identity_206505", "image": "TextVQA/images/495b7bb8e84c2f46.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What musical is being advertised on the billboard above the McDonald's?" }, { "from": "gpt", "value": "The answer is Chicago" } ], "clip_score": 31.03125, "loss": 3.332368850708008 }, { "id": "identity_245151", "image": "VQA2.0/images/336991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orange objects are visible in the image, including the woman's shirt?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 15.3046875, "loss": 3.692713975906372 }, { "id": "identity_47757", "image": "ChartQA/images/18051975000655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of acute care beds for Slovenia in 2009?" }, { "from": "gpt", "value": "The answer is 6,000" } ], "clip_score": 25.75, "loss": 1.915114164352417 }, { "id": "identity_166926", "image": "TabMWP/images/34468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of building heights?" }, { "from": "gpt", "value": "The answer is 41 meters" } ], "clip_score": 28.265625, "loss": 3.2534475326538086 }, { "id": "identity_181730", "image": "CLEVR-Math/images/CLEVR_train_061475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more spheres or cylinders in the image?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 28.375, "loss": 3.7007923126220703 }, { "id": "identity_245685", "image": "TabMWP/images/30640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of vehicles sold by the dealerships?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 30.375, "loss": 2.560880184173584 }, { "id": "identity_63766", "image": "ScienceQA/images/2689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the magnets in Pair 2?" }, { "from": "gpt", "value": "The answer is 3 inches" } ], "clip_score": 19.078125, "loss": 1.9023782014846802 }, { "id": "identity_192400", "image": "PMC-VQA/images/PMC7873062_Fig1_364493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are labeled with \"Peduncles\"?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.796875, "loss": 4.325273513793945 }, { "id": "identity_268736", "image": "IconQA/images/56199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a crab, which picture corresponds to the seventh position in the sequence of images provided? \nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.59375, "loss": 3.7772045135498047 }, { "id": "identity_234808", "image": "Super-CLEVR/images/superCLEVR_new_000236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same color as the scooter?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.34375, "loss": 2.3800761699676514 }, { "id": "identity_231580", "image": "VQA2.0/images/104452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people and surfboards combined in the image?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.203125, "loss": 2.420280933380127 }, { "id": "identity_46876", "image": "MapQA/images/map_10585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have data not available (N/A)?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.5625, "loss": 2.4242401123046875 }, { "id": "identity_298752", "image": "Geometry3K/images/783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In circle G, if angle AGB measures 30 degrees and CG is perpendicular to GD, what is the measure of arc BCD?\nChoices:\n(A) 30\n(B) 45\n(C) 120\n(D) 150" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.4375, "loss": 2.4892287254333496 }, { "id": "identity_278578", "image": "AI2D/images/2924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the bony anterior portion of the palate known as? \nChoices:\n(A) uvula\n(B) soft palate\n(C) palatine tonsil\n(D) hard palate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.34375, "loss": 3.9200692176818848 }, { "id": "identity_316682", "image": "Geometry3K/images/72.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 4.\nChoices:\n(A) 28\n(B) 38\n(C) 52\n(D) 62" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.953125, "loss": 3.617997646331787 }, { "id": "identity_39961", "image": "DVQA/images/bar_train_00108311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest percentage of people preferring 'poem'?" }, { "from": "gpt", "value": "The answer is Nature" } ], "clip_score": 28.234375, "loss": 2.772599458694458 }, { "id": "identity_117770", "image": "VQA2.0/images/507889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is visible on the road to the left?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 12.5234375, "loss": 3.324596405029297 }, { "id": "identity_236329", "image": "CLEVR-Math/images/CLEVR_train_050669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in total, and how many of them are spheres?" }, { "from": "gpt", "value": "The answer is 5 objects in total, 3 of which are spheres" } ], "clip_score": 26.65625, "loss": 1.4336649179458618 }, { "id": "identity_64026", "image": "ScienceQA/images/20730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique items are listed between Patty and Tisha's wants combined?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.359375, "loss": 1.9615507125854492 }, { "id": "identity_270319", "image": "VQA-AS/images/6019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of the curtains?" }, { "from": "gpt", "value": "The answer is windows" } ], "clip_score": 25.34375, "loss": 3.542254686355591 }, { "id": "identity_19754", "image": "TextVQA/images/860d0c6bfe1c53cb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of this chart?" }, { "from": "gpt", "value": "The answer is the road to database technology" } ], "clip_score": 13.46875, "loss": 2.267427682876587 }, { "id": "identity_172092", "image": "IconQA/images/74107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the objects arranged in a single row or multiple rows?" }, { "from": "gpt", "value": "The answer is Single row" } ], "clip_score": 26.59375, "loss": 2.864896059036255 }, { "id": "identity_270838", "image": "VizWiz/images/VizWiz_train_00000117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what that is?" }, { "from": "gpt", "value": "The answer is mailbox" } ], "clip_score": 27.53125, "loss": 2.91117000579834 }, { "id": "identity_7493", "image": "FigureQA/images/10354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.234375, "loss": 3.7581875324249268 }, { "id": "identity_43102", "image": "DVQA/images/bar_train_00090466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of heart at brass?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 24.890625, "loss": 2.8142786026000977 }, { "id": "identity_221258", "image": "A-OKVQA/images/245497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Nighttime" } ], "clip_score": 11.7890625, "loss": 3.04181170463562 }, { "id": "identity_133232", "image": "Super-CLEVR/images/superCLEVR_new_001930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the bus?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.671875, "loss": 2.230241298675537 }, { "id": "identity_129431", "image": "PMC-VQA/images/PMC5381228_f4_149539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you estimate the thickness of the fibers in the image?" }, { "from": "gpt", "value": "The answer is Less than 1 µm, approximately 0.5 µm" } ], "clip_score": 29.375, "loss": 2.0893399715423584 }, { "id": "identity_155733", "image": "AI2D/images/995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure that resembles a flower in the image?" }, { "from": "gpt", "value": "The answer is Peltate scale" } ], "clip_score": 27.40625, "loss": 2.5518510341644287 }, { "id": "identity_48491", "image": "DocVQA/images/xzgk0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the internet service provided in the offer?" }, { "from": "gpt", "value": "The answer is 1-year" } ], "clip_score": 19.875, "loss": 2.3864474296569824 }, { "id": "identity_203446", "image": "TextVQA/images/018c95ebb9fece08.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What flavor is the Benzidrol medication?" }, { "from": "gpt", "value": "The answer is Mint" } ], "clip_score": 14.0234375, "loss": 2.756498336791992 }, { "id": "identity_66403", "image": "AI2D/images/829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are depicted in the life cycle of this insect?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.84375, "loss": 2.385350465774536 }, { "id": "identity_11827", "image": "DVQA/images/bar_train_00064595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units of items tail and shift were sold?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 30.84375, "loss": 2.508117914199829 }, { "id": "identity_275069", "image": "TabMWP/images/35886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each plate contains 10 pumpkin bars, how many pumpkin bars are there in total on 5 plates?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 41.09375, "loss": 1.744635820388794 }, { "id": "identity_216458", "image": "VQA2.0/images/465005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the bench in relation to the tree?" }, { "from": "gpt", "value": "The answer is In front of" } ], "clip_score": 19.5625, "loss": 3.207761287689209 }, { "id": "identity_249455", "image": "IconQA/images/57768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fins are visible on the whale?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.703125, "loss": 2.073204755783081 }, { "id": "identity_159227", "image": "GeoQA+/images/1961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of CD in the quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 35.59375, "loss": 2.444215774536133 }, { "id": "identity_180564", "image": "CLEVR-Math/images/CLEVR_train_017118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the objects are metallic?" }, { "from": "gpt", "value": "The answer is 2/6 or 1/3" } ], "clip_score": 23.84375, "loss": 1.2059279680252075 }, { "id": "identity_326227", "image": "IconQA/images/9234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Complete the sentence to describe the model. The model shows 6 dots split into 2 equal groups. Each group contains (_) dots." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.109375, "loss": 1.9695848226547241 }, { "id": "identity_30459", "image": "PMC-VQA/images/PMC8046147_jbm410472-fig-0005_417800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being displaced by the ossification on the plantar aspect of the foot?\nChoices:\n(A) Bones \n(B) Tendons \n(C) Ligaments \n(D) Muscles " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.71875, "loss": 4.160791873931885 }, { "id": "identity_334609", "image": "FigureQA/images/91303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the Green curve the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.21875, "loss": 4.211864471435547 }, { "id": "identity_177007", "image": "TabMWP/images/5911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more students attend Oceanside Elementary compared to Eastside Elementary?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.59375, "loss": 2.7313201427459717 }, { "id": "identity_197107", "image": "Super-CLEVR/images/superCLEVR_new_000200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.609375, "loss": 2.387709379196167 }, { "id": "identity_4576", "image": "VizWiz/images/VizWiz_train_00005097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is landline telephone" } ], "clip_score": 30.953125, "loss": 2.156027317047119 }, { "id": "identity_66972", "image": "ScienceQA/images/12472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in this food chain?" }, { "from": "gpt", "value": "The answer is There are three trophic levels" } ], "clip_score": 27.078125, "loss": 2.5064191818237305 }, { "id": "identity_77752", "image": "AI2D/images/2673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What month is represented by the Earth's position directly opposite to the Sun from June?" }, { "from": "gpt", "value": "The answer is January" } ], "clip_score": 34.1875, "loss": 3.3124020099639893 }, { "id": "identity_33835", "image": "TabMWP/images/7311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Dylan kept a driving log to see how many miles he drove each day. What is the median of the numbers?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 37.1875, "loss": 2.5832626819610596 }, { "id": "identity_52414", "image": "DocVQA/images/yznh0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum camping fee mentioned in the document?" }, { "from": "gpt", "value": "The answer is 4.00" } ], "clip_score": 18.796875, "loss": 2.2967495918273926 }, { "id": "identity_95686", "image": "TabMWP/images/28721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is the key lime pie compared to the cherry pie?" }, { "from": "gpt", "value": "The answer is $3.53" } ], "clip_score": 27.484375, "loss": 2.002232074737549 }, { "id": "identity_320021", "image": "PMC-VQA/images/PMC5548361_F1_201014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the vasculature visible in the tectum?\nChoices:\n(A) The two dorsal sections\n(B) The two ventral sections\n(C) The two middle sections\n(D) All the sections" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.265625, "loss": 4.132562637329102 }, { "id": "identity_187904", "image": "PMC-VQA/images/PMC7396416_Fig7_224247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any surgical instruments visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.609375, "loss": 3.167966604232788 }, { "id": "identity_206577", "image": "PMC-VQA/images/PMC7560692_fig0010_274447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fluorescent colors are visible in the image?" }, { "from": "gpt", "value": "The answer is Green and red fluorescence" } ], "clip_score": 26.3125, "loss": 2.397786855697632 }, { "id": "identity_123427", "image": "VQA-AS/images/16118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.109375, "loss": 2.6162993907928467 }, { "id": "identity_108180", "image": "TextVQA/images/fcffc83372fe4014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color scheme of the team's uniform?" }, { "from": "gpt", "value": "The answer is White and black" } ], "clip_score": 21.15625, "loss": 3.20874285697937 }, { "id": "identity_103568", "image": "TextVQA/images/01ab502812393b4f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many names are listed above the title on the movie poster to the left?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.609375, "loss": 4.557791709899902 }, { "id": "identity_176835", "image": "TabMWP/images/20010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of yellow blocks to buckets?" }, { "from": "gpt", "value": "The answer is 5:1" } ], "clip_score": 32.53125, "loss": 1.4785945415496826 }, { "id": "identity_122461", "image": "VQA-RAD/images/synpic18319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of a pneumothorax on this chest X-ray?" }, { "from": "gpt", "value": "The answer is No, there are no signs of pneumothorax" } ], "clip_score": 34.6875, "loss": 1.4979417324066162 }, { "id": "identity_289886", "image": "TextVQA/images/03ee1c3f5cdc8110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this airline?" }, { "from": "gpt", "value": "The answer is kendell" } ], "clip_score": 24.4375, "loss": 3.3927886486053467 }, { "id": "identity_210972", "image": "Super-CLEVR/images/superCLEVR_new_000278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object in the center of the image designed for transportation or leisure?" }, { "from": "gpt", "value": "The answer is Both transportation and leisure" } ], "clip_score": 22.1875, "loss": 2.1777899265289307 }, { "id": "identity_321627", "image": "PMC-VQA/images/PMC6707203_Fig2_27521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrowhead indicate in this image?\nChoices:\n(A) Cell nucleus \n(B) Labeled skin cell \n(C) Mitochondria \n(D) Cell membrane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.549906253814697 }, { "id": "identity_93261", "image": "IconQA/images/99608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of rockets in the image?" }, { "from": "gpt", "value": "The answer is 2 rockets" } ], "clip_score": 28.15625, "loss": 2.372067451477051 }, { "id": "identity_48623", "image": "DVQA/images/bar_train_00160892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer 'writer' across all categories?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.046875, "loss": 2.3698930740356445 }, { "id": "identity_232013", "image": "DVQA/images/bar_train_00051904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of all the bars in the \"aid\" category?" }, { "from": "gpt", "value": "The answer is 180" } ], "clip_score": 18.1875, "loss": 1.9956364631652832 }, { "id": "identity_266488", "image": "A-OKVQA/images/307438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color light do the cars positioned perpendicular to the camera display?\nChoices:\n(A) green\n(B) blue\n(C) red\n(D) yellow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.46875, "loss": 3.8524391651153564 }, { "id": "identity_137585", "image": "DocVQA/images/tyjc0003_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the cigarette mentioned in the document?" }, { "from": "gpt", "value": "The answer is Salem Pianissimo Slim Lt. 100 CP Export (Japan)" } ], "clip_score": 21.65625, "loss": 3.8076364994049072 }, { "id": "identity_188495", "image": "TextVQA/images/04a575f307c7b89a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wine bottles are displayed in total?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 25.265625, "loss": 1.7956037521362305 }, { "id": "identity_332382", "image": "PMC-VQA/images/PMC6365311_Fig3_432950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging feature that differentiates this mass from a meningioma?\nChoices:\n(A) CSF cleft\n(B) Dural tail\n(C) Wide dural base\n(D) Poorly outlined cortical interface" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.46875, "loss": 4.6491618156433105 }, { "id": "identity_13761", "image": "FigureQA/images/73304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Magenta less than Dark Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 3.578829288482666 }, { "id": "identity_329637", "image": "TabMWP/images/34203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many readers have at least 3 winter hats according to the poll?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 38.5625, "loss": 2.43654465675354 }, { "id": "identity_333299", "image": "TabMWP/images/25154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more did one restaurant make from supreme pizzas compared to another?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 35.5625, "loss": 2.835918426513672 }, { "id": "identity_326332", "image": "TextVQA/images/78a5a7902e284852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current time?" }, { "from": "gpt", "value": "The answer is 5:33:05" } ], "clip_score": 10.5078125, "loss": 1.974564552307129 }, { "id": "identity_333279", "image": "MapQA/images/map_9860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the highest value in the West?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.78125, "loss": 2.6729283332824707 }, { "id": "identity_274227", "image": "IconQA/images/25453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Estimate the number of marbles. Which of the following choices is the closest estimate:\n(A) approximately 40\n(B) approximately 70" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.078125, "loss": 3.2420406341552734 }, { "id": "identity_10631", "image": "TextVQA/images/e0c0747ef4f0bd9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time is it at the moment during the show?" }, { "from": "gpt", "value": "The answer is 12:59" } ], "clip_score": 19.34375, "loss": 1.773440957069397 }, { "id": "identity_203325", "image": "PMC-VQA/images/PMC4726387_f4_465086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification is likely used in image A based on the scale bar?" }, { "from": "gpt", "value": "The answer is High magnification (likely 400x or 1000x)" } ], "clip_score": 26.296875, "loss": 1.5410194396972656 }, { "id": "identity_100877", "image": "TabMWP/images/1959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a tourist takes the 9:00 A.M. bus from the old building, at what time will they reach the skyscraper?" }, { "from": "gpt", "value": "The answer is 2:00 P.M" } ], "clip_score": 27.625, "loss": 1.3257012367248535 }, { "id": "identity_261672", "image": "MapQA/images/map_11829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Kentucky lower than that of Arkansas?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.1875, "loss": 3.6835155487060547 }, { "id": "identity_257620", "image": "ScienceQA/images/13178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum number of trades needed for Elise and Terrell to each get at least one item they want, and what are those trades?" }, { "from": "gpt", "value": "The answer is 1 trade" } ], "clip_score": 20.703125, "loss": 3.1410624980926514 }, { "id": "identity_206321", "image": "TextVQA/images/0e3dbba8d9235257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a body of water visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 12.8828125, "loss": 2.8649373054504395 }, { "id": "identity_272249", "image": "DocVQA/images/qyfm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the document?" }, { "from": "gpt", "value": "The answer is balancing the benefits and risks of the application of science to agriculture and food production" } ], "clip_score": 21.984375, "loss": 1.1578961610794067 }, { "id": "identity_281176", "image": "DocVQA/images/lybx0223_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is being referred to?" }, { "from": "gpt", "value": "The answer is May 29, 2012" } ], "clip_score": 16.828125, "loss": 1.6419541835784912 }, { "id": "identity_87616", "image": "Geometry3K/images/2048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle represented by (1/4)x°?" }, { "from": "gpt", "value": "The answer is 1°" } ], "clip_score": 32.59375, "loss": 2.4814984798431396 }, { "id": "identity_108172", "image": "TextVQA/images/da0c5d76e6fe2b1f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pens attached to the clipboards?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 11.9453125, "loss": 3.7229621410369873 }, { "id": "identity_211441", "image": "A-OKVQA/images/345861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible on the desk?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.640625, "loss": 2.0937659740448 }, { "id": "identity_308675", "image": "IconQA/images/62909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a crab. What is the second image?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.65625, "loss": 3.9421191215515137 }, { "id": "identity_333860", "image": "GeoQA+/images/4208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在⊙O中,弦AB、CD所对的圆心角分别是∠AOB、∠COD,若这两个角互补,且AB=2,CD=4,则⊙O的半径是()\n\nChoices:\n(A) √{3}\n(B) 2\n(C) √{5}\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.921875, "loss": 2.5665225982666016 }, { "id": "identity_281364", "image": "ScienceQA/images/10870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is best suited for Devin's experiment to answer:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.625, "loss": 3.9708118438720703 }, { "id": "identity_42656", "image": "FigureQA/images/247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the segment that is opposite to Firebrick?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 33.34375, "loss": 2.7162466049194336 }, { "id": "identity_263009", "image": "AI2D/images/2569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram provided, during which dates does the Autumnal Equinox take place?\nChoices:\n(A) June 21-22\n(B) March 21-22\n(C) December 21-22\n(D) September 22-23" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.609375, "loss": 3.4141643047332764 }, { "id": "identity_11229", "image": "TQA/images/question_images/lewis_dot_diagrams_9132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many covalent bonds does Bromine form?\nChoices:\n(A) 1\n(B) 3\n(C) 2\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.890625, "loss": 2.8238182067871094 }, { "id": "identity_307318", "image": "ScienceQA/images/3235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, heat moved from () to ().\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.90625, "loss": 3.955531358718872 }, { "id": "identity_171532", "image": "TabMWP/images/24416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy three measuring tapes, how much will it cost?" }, { "from": "gpt", "value": "The answer is $36.00" } ], "clip_score": 29.484375, "loss": 1.7384132146835327 }, { "id": "identity_246887", "image": "IconQA/images/21394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 30.109375, "loss": 4.4518046379089355 }, { "id": "identity_61766", "image": "DocVQA/images/qfjg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person who wrote the letter?" }, { "from": "gpt", "value": "The answer is Production Manager" } ], "clip_score": 20.765625, "loss": 4.135729789733887 }, { "id": "identity_289987", "image": "DocVQA/images/jqxf0227_26.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which subject is discussed on the given page?" }, { "from": "gpt", "value": "The answer is Chronic illness Degenerative Diseases" } ], "clip_score": 23.546875, "loss": 3.1782658100128174 }, { "id": "identity_266816", "image": "TabMWP/images/1169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the barn on the farmer's farm, after examining each building to identify lights that need repair, how many more broken lights are there compared to working lights?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 43.09375, "loss": 2.4198710918426514 }, { "id": "identity_279253", "image": "Super-CLEVR/images/superCLEVR_new_000376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shiny motorbike on the right the same size as the metallic motorbike on the left of the large green bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 34.65625, "loss": 4.940618515014648 }, { "id": "identity_136511", "image": "FigureQA/images/3516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the color Dark Khaki represent a value less than 50?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.46875, "loss": 3.207449436187744 }, { "id": "identity_131811", "image": "PMC-VQA/images/PMC6479307_molecules-24-01192-f009_470003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the cells in a healthy or degenerative state?" }, { "from": "gpt", "value": "The answer is Healthy" } ], "clip_score": 23.234375, "loss": 2.6200954914093018 }, { "id": "identity_80860", "image": "GeoQA+/images/2237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,AD是⊙O的直径,∠ABC=35°,则∠BAD的度数是()" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 27.59375, "loss": 1.7072875499725342 }, { "id": "identity_259300", "image": "IconQA/images/24501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 2 cm" } ], "clip_score": 29.9375, "loss": 2.0081324577331543 }, { "id": "identity_43596", "image": "ChartQA/images/7533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest percentage of officers thinking local police should take an active role?" }, { "from": "gpt", "value": "The answer is Whites (59%)" } ], "clip_score": 37.09375, "loss": 2.2410130500793457 }, { "id": "identity_162158", "image": "GeoQA+/images/3873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠BAC = 40°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 100°" } ], "clip_score": 33.59375, "loss": 1.7409082651138306 }, { "id": "identity_39147", "image": "DVQA/images/bar_train_00150486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are represented in the graph?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.8125, "loss": 2.8158843517303467 }, { "id": "identity_268485", "image": "DocVQA/images/ppxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which number is referenced in the document?" }, { "from": "gpt", "value": "The answer is 2-74" } ], "clip_score": 18.125, "loss": 2.093477964401245 }, { "id": "identity_208321", "image": "PMC-VQA/images/PMC6603090_F3_2548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is shown in the image?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI sequence" } ], "clip_score": 27.828125, "loss": 1.754510760307312 }, { "id": "identity_43308", "image": "FigureQA/images/80629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Orchid at xaxis_label 60?" }, { "from": "gpt", "value": "The answer is Approximately 84.6" } ], "clip_score": 25.390625, "loss": 2.641326665878296 }, { "id": "identity_171664", "image": "CLEVR-Math/images/CLEVR_train_037435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are both small and blue?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.828125, "loss": 2.6273930072784424 }, { "id": "identity_256179", "image": "TabMWP/images/4154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each room has 10 people, how many people are in 8 rooms?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 29.96875, "loss": 1.661952018737793 }, { "id": "identity_66076", "image": "AI2D/images/2670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the Earth during the June Solstice?" }, { "from": "gpt", "value": "The answer is The Earth is at the top position, North Pole towards the Sun" } ], "clip_score": 32.0625, "loss": 2.6874916553497314 }, { "id": "identity_140072", "image": "DocVQA/images/lkhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned on the letter?" }, { "from": "gpt", "value": "The answer is June 1, 1937" } ], "clip_score": 23.546875, "loss": 2.0983026027679443 }, { "id": "identity_36111", "image": "IconQA/images/78279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) certain\n(B) unlikely\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 4.348911285400391 }, { "id": "identity_64133", "image": "TQA/images/question_images/human_system_digestive_3706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organ is not part of the digestive system?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 35.75, "loss": 2.990264415740967 }, { "id": "identity_27559", "image": "A-OKVQA/images/325532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How are the objects in the sky powered?\nChoices:\n(A) gas\n(B) wind\n(C) sun\n(D) electricity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.1171875, "loss": 4.281625270843506 }, { "id": "identity_26164", "image": "AI2D/images/118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What term best describes B in the diagram?\nChoices:\n(A) Autumn\n(B) Winter\n(C) Spring\n(D) Summer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.828125, "loss": 4.350788593292236 }, { "id": "identity_161620", "image": "GeoQA+/images/748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the measure of angle 1 is 34 degrees, what is the measure of angle 3, the angle adjacent to angle 1 on the straight edge?" }, { "from": "gpt", "value": "The answer is 146 degrees" } ], "clip_score": 31.828125, "loss": 1.9806548357009888 }, { "id": "identity_215452", "image": "A-OKVQA/images/118051.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any live plants on the desk?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.390625, "loss": 3.2799763679504395 }, { "id": "identity_145364", "image": "FigureQA/images/81730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value on the y-axis label for Deep Pink?" }, { "from": "gpt", "value": "The answer is 78" } ], "clip_score": 27.453125, "loss": 2.7726001739501953 }, { "id": "identity_13108", "image": "IconQA/images/77190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are on the frame?\nChoices:\n(A) 4\n(B) 1\n(C) 3\n(D) 2\n(E) 8" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 3.4721343517303467 }, { "id": "identity_12624", "image": "FigureQA/images/66985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.296875, "loss": 3.8023059368133545 }, { "id": "identity_9517", "image": "FigureQA/images/64624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Gold have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.625, "loss": 4.185274124145508 }, { "id": "identity_264261", "image": "UniGeo/images/2900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, C, and D are all on circle O, with angle B measuring 55°. If AO is parallel to DC, what is the degree measure of angle AOD?\nChoices:\n(A) 70°\n(B) 75°\n(C) 80°\n(D) 85°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.3125, "loss": 2.332000255584717 }, { "id": "identity_141626", "image": "MapQA/images/map_1636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with a percentage of films less than 10.2%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.46875, "loss": 3.027766704559326 }, { "id": "identity_190159", "image": "PMC-VQA/images/PMC8325865_Fig1_24387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.78125, "loss": 3.2619547843933105 }, { "id": "identity_53687", "image": "DVQA/images/bar_train_00163246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the algorithm site have a higher accuracy on the dataset tray compared to the dataset reach?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 35.8125, "loss": 2.637521982192993 }, { "id": "identity_104589", "image": "Super-CLEVR/images/superCLEVR_new_001290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the purple object?" }, { "from": "gpt", "value": "The answer is Minivan" } ], "clip_score": 30.125, "loss": 3.8647334575653076 }, { "id": "identity_241009", "image": "Geometry3K/images/1446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle EDC" }, { "from": "gpt", "value": "The answer is 15°" } ], "clip_score": 34.5, "loss": 2.3026082515716553 }, { "id": "identity_272498", "image": "UniGeo/images/2514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, circle O is the circumcircle of triangle ACD, AB is the diameter of circle O, and ∠BAD = 60°. What is the degree measure of ∠C?\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.59375, "loss": 2.5162627696990967 }, { "id": "identity_193997", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g005_437036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many axial slices of the brain are displayed in the image?" }, { "from": "gpt", "value": "The answer is Nine axial slices" } ], "clip_score": 30.578125, "loss": 2.7375869750976562 }, { "id": "identity_135673", "image": "MapQA/images/map_859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of New York?" }, { "from": "gpt", "value": "The answer is 61.7%-61.8%" } ], "clip_score": 19.953125, "loss": 2.1658756732940674 }, { "id": "identity_216661", "image": "Super-CLEVR/images/superCLEVR_new_001505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that has a pattern or multiple colors?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.15625, "loss": 2.9644298553466797 }, { "id": "identity_13124", "image": "PlotQA/images/154712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of the Y-axis ?" }, { "from": "gpt", "value": "The answer is Amount spent (% of revenue)" } ], "clip_score": 21.4375, "loss": 3.008974552154541 }, { "id": "identity_265541", "image": "DVQA/images/bar_train_00034611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many groups of bars is there at least one bar with a value less than 6?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 18.765625, "loss": 3.80607533454895 }, { "id": "identity_59320", "image": "FigureQA/images/63221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Cyan at x-axis value 60?" }, { "from": "gpt", "value": "The answer is Approximately 35" } ], "clip_score": 26.375, "loss": 2.537560224533081 }, { "id": "identity_245543", "image": "DVQA/images/bar_train_00108744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the 'pace' category, and how does it compare to the total number of people who prefer the 'pistol' category?" }, { "from": "gpt", "value": "The answer is 11 people prefer 'pace', 15 people prefer 'pistol'" } ], "clip_score": 29.953125, "loss": 1.8210138082504272 }, { "id": "identity_193866", "image": "TextVQA/images/765bb8ab1e6a0eff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the word \"ECOSAN\" appear on the board?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.78125, "loss": 1.9707926511764526 }, { "id": "identity_103735", "image": "TextVQA/images/9aae5672d3f4f1c7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many celestial bodies are depicted within the illustration?" }, { "from": "gpt", "value": "The answer is Nine" } ], "clip_score": 16.984375, "loss": 3.1772167682647705 }, { "id": "identity_318223", "image": "TabMWP/images/8639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jen has $1,247. Can she afford a guitar and a cornet?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 3.489943027496338 }, { "id": "identity_117892", "image": "Super-CLEVR/images/superCLEVR_new_001375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the large cyan bicycle and left of the small green metallic fighter?" }, { "from": "gpt", "value": "The answer is Purple and silver" } ], "clip_score": 31.203125, "loss": 3.9360570907592773 }, { "id": "identity_164507", "image": "GeoQA+/images/5324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ABD and ∠ACD are angle bisectors meeting at point P, and ∠A = 48°, what is the measure of ∠BPC?" }, { "from": "gpt", "value": "The answer is 199°" } ], "clip_score": 33.34375, "loss": 2.273458242416382 }, { "id": "identity_23573", "image": "IconQA/images/4808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a moon. Which picture is third?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.234375, "loss": 4.033442497253418 }, { "id": "identity_248030", "image": "VizWiz/images/VizWiz_train_00013240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capacity of the food chopper in milliliters?" }, { "from": "gpt", "value": "The answer is 355.5 milliliters" } ], "clip_score": 20.03125, "loss": 2.2088003158569336 }, { "id": "identity_332887", "image": "DocVQA/images/qqjw0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organization mentioned in the title?" }, { "from": "gpt", "value": "The answer is swanson center for nutrition, inc." } ], "clip_score": 19.578125, "loss": 1.9115296602249146 }, { "id": "identity_317945", "image": "DVQA/images/bar_train_00021828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the darkorange color represent?" }, { "from": "gpt", "value": "The answer is affair" } ], "clip_score": 20.78125, "loss": 5.386349678039551 }, { "id": "identity_320454", "image": "A-OKVQA/images/577125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the skate technique being performed?\nChoices:\n(A) 180\n(B) tail slide\n(C) board slide\n(D) manual" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.328125, "loss": 4.025282859802246 }, { "id": "identity_41422", "image": "FigureQA/images/62166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 26.703125, "loss": 3.5144431591033936 }, { "id": "identity_235888", "image": "VQA2.0/images/24076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of paint can be identified in the paintings, and what are they?" }, { "from": "gpt", "value": "The answer is Six colors: red, blue, white, yellow, black, brown" } ], "clip_score": 25.546875, "loss": 2.122445821762085 }, { "id": "identity_193229", "image": "PMC-VQA/images/PMC8122453_sensors-21-02934-f012_440830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color used to indicate the protected area in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.578125, "loss": 3.2753746509552 }, { "id": "identity_39823", "image": "FigureQA/images/73099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value of Medium Aqua?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 23.625, "loss": 2.428126811981201 }, { "id": "identity_253534", "image": "A-OKVQA/images/198437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equestrian event is the woman in yellow participating in, and what are the key indicators in the image that support this conclusion?" }, { "from": "gpt", "value": "The answer is Dressage competition" } ], "clip_score": 30.40625, "loss": 3.613610029220581 }, { "id": "identity_258532", "image": "TQA/images/abc_question_images/ocean_zones_18126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter represents the zone where photosynthesis is most likely to occur, and what is the relationship between this zone and the pelagic zone?" }, { "from": "gpt", "value": "The answer is L" } ], "clip_score": 33.78125, "loss": 4.153082847595215 }, { "id": "identity_2398", "image": "UniGeo/images/2836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C,D是圆上两点,∠AOC=50°,则∠D等于()\nChoices:\n(A) 25°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.046875, "loss": 2.6628384590148926 }, { "id": "identity_59582", "image": "FigureQA/images/91965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Lime Green dots are there above the 60 mark on the y-axis?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.640625, "loss": 2.3956949710845947 }, { "id": "identity_277851", "image": "A-OKVQA/images/33871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would this person need to go if they wanted to wash their hands?\nChoices:\n(A) here\n(B) gas station\n(C) bathroom\n(D) kitchen" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.578125, "loss": 3.968949794769287 }, { "id": "identity_20295", "image": "TextVQA/images/629023a813c058d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is skyy back there?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.65625, "loss": 3.498302936553955 }, { "id": "identity_19168", "image": "VQA-RAD/images/synpic42157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a cystic or solid mass?" }, { "from": "gpt", "value": "The answer is Cystic" } ], "clip_score": 26.9375, "loss": 1.7703702449798584 }, { "id": "identity_273025", "image": "DocVQA/images/xkjp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the date mentioned in the document?" }, { "from": "gpt", "value": "The answer is august 8, 1976" } ], "clip_score": 17.171875, "loss": 1.7619065046310425 }, { "id": "identity_261316", "image": "ScienceQA/images/1270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary rhetorical appeal used in this advertisement?\nChoices:\n(A) logos (reason)\n(B) ethos (character)\n(C) pathos (emotion)" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.0078125, "loss": 4.660575866699219 }, { "id": "identity_42478", "image": "DVQA/images/bar_train_00015454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest accuracy value for the 'door' dataset?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.90625, "loss": 3.073399305343628 }, { "id": "identity_113278", "image": "VQA-AS/images/10220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the mushrooms near the pond?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 28.140625, "loss": 3.059083938598633 }, { "id": "identity_222127", "image": "PMC-VQA/images/PMC5586526_f1_212261.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of midline shift in the MRI scan?" }, { "from": "gpt", "value": "The answer is No midline shift" } ], "clip_score": 30.96875, "loss": 3.024725914001465 }, { "id": "identity_264589", "image": "GeoQA+/images/16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, quadrilateral ABCD is inscribed in circle O. Given that angle BCE is 70°, what is the degree measure of angle A?\nChoices:\n(A) 110°\n(B) 70°\n(C) 55°\n(D) 35°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0, "loss": 2.797224521636963 }, { "id": "identity_37263", "image": "VQA-AS/images/4422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kids are seated at the table?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.546875, "loss": 2.427948236465454 }, { "id": "identity_5560", "image": "Super-CLEVR/images/superCLEVR_new_000716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the large thing behind the red motorbike have the same shape as the brown metallic thing?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 17.09375, "loss": 5.250420093536377 }, { "id": "identity_204720", "image": "A-OKVQA/images/569050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What flag is flying on the building behind the bus?" }, { "from": "gpt", "value": "The answer is Russia" } ], "clip_score": 26.921875, "loss": 3.8628921508789062 }, { "id": "identity_299446", "image": "TabMWP/images/27870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the table provided, is there a shortage or a surplus at a price of $910?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.75, "loss": 3.614645481109619 }, { "id": "identity_54479", "image": "FigureQA/images/61465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the highest peak value?" }, { "from": "gpt", "value": "The answer is Aqua" } ], "clip_score": 27.328125, "loss": 2.7955970764160156 }, { "id": "identity_210913", "image": "A-OKVQA/images/565500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any pedestrian crossing the street in the image?" }, { "from": "gpt", "value": "The answer is Yes, one person" } ], "clip_score": 25.859375, "loss": 3.9784278869628906 }, { "id": "identity_87937", "image": "GEOS/images/044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AED?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 29.0, "loss": 2.127906560897827 }, { "id": "identity_76446", "image": "TQA/images/abc_question_images/human_system_eye_16125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"P\" represent, which is the part responsible for draining excess fluid from the eye?" }, { "from": "gpt", "value": "The answer is Trabecular Meshwork (Label P)" } ], "clip_score": 28.421875, "loss": 2.205712080001831 }, { "id": "identity_62723", "image": "ScienceQA/images/18118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is common in both Sandra's and Zoe's list of wants?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 33.3125, "loss": 3.2699942588806152 }, { "id": "identity_87393", "image": "GeoQA+/images/674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 2?" }, { "from": "gpt", "value": "The answer is 105°" } ], "clip_score": 32.28125, "loss": 2.161525249481201 }, { "id": "identity_80318", "image": "GeoQA+/images/4350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the right triangle ABC, if angle C is 90 degrees, AC is 8, and BC is 6, what is the length of AB?" }, { "from": "gpt", "value": "The answer is AB = 10" } ], "clip_score": 30.140625, "loss": 1.5549901723861694 }, { "id": "identity_163230", "image": "UniGeo/images/1557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with DE parallel to BC, if AE is 4 units and EC is 2 units, what is the ratio of AD to DB?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 34.5, "loss": 1.0343894958496094 }, { "id": "identity_215725", "image": "A-OKVQA/images/31471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is displayed on the screen in the background?" }, { "from": "gpt", "value": "The answer is 2013" } ], "clip_score": 15.8828125, "loss": 1.3762582540512085 }, { "id": "identity_166512", "image": "TabMWP/images/11381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of students surveyed?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 22.390625, "loss": 2.3031907081604004 }, { "id": "identity_114120", "image": "TextVQA/images/629b9642469d7ec4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding the banner on the right side of the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.65625, "loss": 4.270166397094727 }, { "id": "identity_269872", "image": "ScienceQA/images/12472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the skunk considered a consumer in this food chain?\nChoices:\n(A) It produces its own food.\n(B) It consumes another organism." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.625, "loss": 3.6602256298065186 }, { "id": "identity_25275", "image": "A-OKVQA/images/315314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which person will try this cake first?\nChoices:\n(A) groomsman\n(B) bride\n(C) groom\n(D) both" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.09375, "loss": 3.9361395835876465 }, { "id": "identity_301813", "image": "GeoQA+/images/601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线a∥b∥c,直线m、n与这些平行线分别交于点A、B、C和点D、E、F。若AB=3,BC=5,DF=12,则DE的值为()\n\n选项:\n(A) \\frac{9}{4}\n(B) 4\n(C) \\frac{9}{2}\n(D) \\frac{15}{2}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.609375, "loss": 2.636197566986084 }, { "id": "identity_187210", "image": "PMC-VQA/images/PMC7939567_F0002_386115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain slice in the image?" }, { "from": "gpt", "value": "The answer is Horizontal" } ], "clip_score": 30.21875, "loss": 3.6379382610321045 }, { "id": "identity_200458", "image": "TextVQA/images/58928e8bb31269d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What footwear is the person standing on the stage wearing?" }, { "from": "gpt", "value": "The answer is Black boots" } ], "clip_score": 23.984375, "loss": 2.484736680984497 }, { "id": "identity_209225", "image": "A-OKVQA/images/1053.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of road marking is directly in front of the car?" }, { "from": "gpt", "value": "The answer is A box junction" } ], "clip_score": 25.703125, "loss": 3.7779927253723145 }, { "id": "identity_9830", "image": "TabMWP/images/3541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tiana has $10.00. Does she have enough to buy a pair of shorts and a purple T-shirt?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.90625, "loss": 3.574166774749756 }, { "id": "identity_310216", "image": "TabMWP/images/33601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An architecture student measured the heights of all the buildings downtown. How many buildings are shorter than 34 meters?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.46875, "loss": 2.307788610458374 }, { "id": "identity_151414", "image": "ScienceQA/images/852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water to the right of the highlighted continent?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 22.640625, "loss": 2.811650514602661 }, { "id": "identity_252159", "image": "TextVQA/images/9d7c16c396280763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the company on the side of the truck, and what is the speed limit on the street?" }, { "from": "gpt", "value": "The answer is AVIS, 25" } ], "clip_score": 25.5, "loss": 3.0174694061279297 }, { "id": "identity_63451", "image": "AI2D/images/1164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"Mitochondrion\" refer to in the cell diagram?" }, { "from": "gpt", "value": "The answer is Organelle for respiration and energy production" } ], "clip_score": 31.203125, "loss": 2.871152877807617 }, { "id": "identity_324260", "image": "TextVQA/images/22db0f943c236f8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is shown on this ramp?" }, { "from": "gpt", "value": "The answer is byerly" } ], "clip_score": 18.65625, "loss": 2.966722249984741 }, { "id": "identity_231843", "image": "IconQA/images/11545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of all the rockets in the sequence?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 20.59375, "loss": 2.193129539489746 }, { "id": "identity_12585", "image": "PMC-VQA/images/PMC6908538_Fig1_82803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the image shown? \nChoices:\n(A) To highlight the difference between the treatment zones. \n(B) To showcase the brain's anatomy. \n(C) To show the effects of radiation therapy on the human brain. \n(D) To display a cross-section of the human brain. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.125, "loss": 4.088624000549316 }, { "id": "identity_321671", "image": "GeoQA+/images/1218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图△ABC中,AB、BC垂直平分线相交于点O,∠BAC=70°,则∠BOC的度数是多少?\n选项:\n(A) 140°\n(B) 130°\n(C) 125°\n(D) 110°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.78125, "loss": 2.4637675285339355 }, { "id": "identity_219117", "image": "A-OKVQA/images/345702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the elephant's decorative headpiece?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.328125, "loss": 3.5338943004608154 }, { "id": "identity_108289", "image": "A-OKVQA/images/230522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.609375, "loss": 2.1955864429473877 }, { "id": "identity_152028", "image": "ScienceQA/images/7045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the peninsula located in the southeastern part of the highlighted country?" }, { "from": "gpt", "value": "The answer is Yucatan Peninsula" } ], "clip_score": 25.015625, "loss": 1.8181012868881226 }, { "id": "identity_186008", "image": "A-OKVQA/images/289995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red umbrellas are visible in the image?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 19.234375, "loss": 2.1506903171539307 }, { "id": "identity_278073", "image": "FigureQA/images/37016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 3.4312210083007812 }, { "id": "identity_52844", "image": "FigureQA/images/25158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Peru section represent more than 50% of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.859375, "loss": 3.3000946044921875 }, { "id": "identity_85762", "image": "Geometry3K/images/1722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 4?" }, { "from": "gpt", "value": "The answer is 94 degrees" } ], "clip_score": 34.40625, "loss": 2.764244556427002 }, { "id": "identity_133043", "image": "A-OKVQA/images/79645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.5625, "loss": 3.3180320262908936 }, { "id": "identity_270616", "image": "TQA/images/abc_question_images/human_system_nervous_16167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the letter that corresponds to the motor cortex:\n(A) R\n(B) N\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.5625, "loss": 4.395272254943848 }, { "id": "identity_240047", "image": "GeoQA+/images/4641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BD是⊙O的直径,∠CBD=30°,求∠A和∠B的和是多少度?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 26.765625, "loss": 1.6140252351760864 }, { "id": "identity_336789", "image": "PMC-VQA/images/PMC5965189_f1-cpcem-01-268_318156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is causing the casting shadows from the endometrium in this image?\nChoices:\n(A) Air in the uterus \n(B) Normal tissue \n(C) Irregular echogenic endometrial debris \n(D) Cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5, "loss": 3.9998526573181152 }, { "id": "identity_114322", "image": "TextVQA/images/7e95c7dabc52e3b2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the superhero's costume, excluding the shield?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.0, "loss": 3.460278272628784 }, { "id": "identity_187119", "image": "PMC-VQA/images/PMC5839382_F4_284295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is shown in the image?" }, { "from": "gpt", "value": "The answer is The cervical and thoracic spine" } ], "clip_score": 30.90625, "loss": 2.840219259262085 }, { "id": "identity_96982", "image": "TabMWP/images/36716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teams have a size ending in 4?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.15625, "loss": 2.291788339614868 }, { "id": "identity_72439", "image": "AI2D/images/4193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the process that forms metamorphic rock?" }, { "from": "gpt", "value": "The answer is Deformation and metamorphism" } ], "clip_score": 24.5625, "loss": 2.0821189880371094 }, { "id": "identity_153905", "image": "ScienceQA/images/3117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two labels are adjacent to the Pond?" }, { "from": "gpt", "value": "The answer is Park and Grocery Store" } ], "clip_score": 36.71875, "loss": 1.8166334629058838 }, { "id": "identity_290124", "image": "PMC-VQA/images/PMC6221646_F1_392942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the window used to capture the given image from the following options: (A) Abdominal window, (B) Chest window, (C) Bone window, or (D) Mediastinal window." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.75, "loss": 4.359009742736816 }, { "id": "identity_118069", "image": "A-OKVQA/images/356147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of landscape is depicted in the image?" }, { "from": "gpt", "value": "The answer is It is a riverside landscape" } ], "clip_score": 20.9375, "loss": 3.6369616985321045 }, { "id": "identity_225587", "image": "A-OKVQA/images/405848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual elements in the image that indicate the motorcycle is part of a parade?" }, { "from": "gpt", "value": "The answer is Crowd, American flag, police markings" } ], "clip_score": 34.4375, "loss": 3.331434488296509 }, { "id": "identity_298937", "image": "VizWiz/images/VizWiz_train_00019307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please identify that?" }, { "from": "gpt", "value": "The answer is bottle" } ], "clip_score": 19.84375, "loss": 2.7946317195892334 }, { "id": "identity_44790", "image": "FigureQA/images/85896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Maroon have a higher value than Medium Purple at x-axis value 50?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.46875, "loss": 3.042154312133789 }, { "id": "identity_111667", "image": "VizWiz/images/VizWiz_train_00012211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any eggs that are not colored?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.03125, "loss": 3.335294246673584 }, { "id": "identity_72440", "image": "AI2D/images/4193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the starting point for the formation of igneous rock according to the diagram?" }, { "from": "gpt", "value": "The answer is Crystallization of magma" } ], "clip_score": 30.09375, "loss": 1.8134268522262573 }, { "id": "identity_65548", "image": "AI2D/images/1222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part of the plant cell is responsible for protein synthesis?" }, { "from": "gpt", "value": "The answer is Ribosomes" } ], "clip_score": 31.15625, "loss": 1.699467658996582 }, { "id": "identity_166881", "image": "TabMWP/images/15146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median size of the swim teams?" }, { "from": "gpt", "value": "The answer is 37.5" } ], "clip_score": 34.90625, "loss": 2.26501202583313 }, { "id": "identity_209648", "image": "A-OKVQA/images/253528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bag carried by the person walking on the sidewalk?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.375, "loss": 3.183570146560669 }, { "id": "identity_9727", "image": "TextVQA/images/6ad84631a4e64421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what year is on the bottom?" }, { "from": "gpt", "value": "The answer is 2007" } ], "clip_score": 12.7734375, "loss": 1.5007160902023315 }, { "id": "identity_71674", "image": "ScienceQA/images/6475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the blade of the shovel pointed or flat?" }, { "from": "gpt", "value": "The answer is Pointed" } ], "clip_score": 29.84375, "loss": 2.7441165447235107 }, { "id": "identity_6385", "image": "PMC-VQA/images/PMC7419209_Fig3_232609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the size of the vaginal mass shown in the image? \nChoices:\n(A) 50 cm \n(B) 120 cm \n(C) 652 cm \n(D) 833 cm " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.65625, "loss": 3.6570515632629395 }, { "id": "identity_268707", "image": "GeoQA+/images/1208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C lie on circle O, with angle ABC measuring 31°. If a tangent is drawn from point C to circle O, intersecting the extension of OA at point D, what is the measure of angle D?\nChoices:\n(A) 32°\n(B) 31°\n(C) 28°\n(D) 27°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.15625, "loss": 2.3640804290771484 }, { "id": "identity_135599", "image": "ChartQA/images/multi_col_1312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage point difference between accessing music via stream ripping for 16 to 24-year-olds compared to the total population?" }, { "from": "gpt", "value": "The answer is 11%" } ], "clip_score": 30.578125, "loss": 2.0718722343444824 }, { "id": "identity_153448", "image": "TQA/images/abc_question_images/ocean_zones_18126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'L' represent in this image?" }, { "from": "gpt", "value": "The answer is Epipelagic zone" } ], "clip_score": 32.8125, "loss": 2.737269163131714 }, { "id": "identity_37781", "image": "TabMWP/images/16127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The members of the track team compared how many miles they ran last week. What is the median of the numbers?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.25, "loss": 2.753570795059204 }, { "id": "identity_134004", "image": "TextVQA/images/53135d87a1768a41.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of product is likely contained in this box?" }, { "from": "gpt", "value": "The answer is Men's fashion or luxury item" } ], "clip_score": 27.59375, "loss": 2.8786327838897705 }, { "id": "identity_257418", "image": "A-OKVQA/images/402823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key elements that indicate this horse is practicing steeplechase, and how do they differentiate it from other activities such as escape, posing, or bucking?" }, { "from": "gpt", "value": "The answer is The structured obstacle course and rider's gear indicate steeplechase" } ], "clip_score": 25.59375, "loss": 2.8153247833251953 }, { "id": "identity_204774", "image": "TextVQA/images/0d4f6be234078add.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the brewery mentioned on the label?" }, { "from": "gpt", "value": "The answer is Zornedinger Strasse 1, Aying, BRD, Germany" } ], "clip_score": 23.078125, "loss": 2.971714973449707 }, { "id": "identity_299894", "image": "TabMWP/images/5732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likelihood of randomly selecting a bow tie from Jennifer's designs at the boutique store that is made of cotton and shaped like a batwing? Please simplify any fractions." }, { "from": "gpt", "value": "The answer is 4/15" } ], "clip_score": 27.78125, "loss": 2.1519148349761963 }, { "id": "identity_77391", "image": "ScienceQA/images/11592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which story is set in a medieval setting?" }, { "from": "gpt", "value": "The answer is A Connecticut Yankee in King Arthur's Court" } ], "clip_score": 30.4375, "loss": 1.2420158386230469 }, { "id": "identity_255134", "image": "PMC-VQA/images/PMC7482117_f2_251602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences in the imaging techniques used in A and B, and how do these differences affect the type of information that can be obtained from each image?" }, { "from": "gpt", "value": "The answer is MRI provides detailed images of soft tissues and brain function" } ], "clip_score": 34.15625, "loss": 3.1471312046051025 }, { "id": "identity_279298", "image": "FigureQA/images/77969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Lawn Green or Medium Turquoise?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.640625, "loss": 3.729872941970825 }, { "id": "identity_239623", "image": "PMC-VQA/images/PMC6243351_fig5_398320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the non-enhanced CT scan of the head, identify the specific signs that indicate a right middle cerebral artery territory infarct and explain why the other options (A, B, and D) are less likely" }, { "from": "gpt", "value": "The answer is Right MCA territory infarct indicated by hypodensity and loss of differentiation" } ], "clip_score": 32.75, "loss": 2.6021814346313477 }, { "id": "identity_37584", "image": "TabMWP/images/33213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During a lesson on graphing data, students in Mrs. Katy's math class guessed a number between 1 and 5 and recorded the results. How many students guessed 4?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 31.03125, "loss": 2.5182697772979736 }, { "id": "identity_255227", "image": "A-OKVQA/images/535278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the items to the left of the stuffed frog and what is the item below the stuffed frog called?" }, { "from": "gpt", "value": "The answer is A cup and a small container, purse" } ], "clip_score": 15.1484375, "loss": 4.04719352722168 }, { "id": "identity_196063", "image": "Super-CLEVR/images/superCLEVR_new_001877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle to the left of the blue bus?" }, { "from": "gpt", "value": "The answer is Purple and yellow" } ], "clip_score": 33.5, "loss": 3.5241198539733887 }, { "id": "identity_127428", "image": "Super-CLEVR/images/superCLEVR_new_000729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the green fighter relative to the brown double bus?" }, { "from": "gpt", "value": "The answer is To the left and slightly behind" } ], "clip_score": 23.015625, "loss": 3.2249560356140137 }, { "id": "identity_252924", "image": "IconQA/images/50221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the seventh picture in the sequence if the first picture is a plane?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 25.5625, "loss": 2.8782341480255127 }, { "id": "identity_64114", "image": "TQA/images/abc_question_images/faults_11738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plate boundary is represented by label J?" }, { "from": "gpt", "value": "The answer is Convergent Plate Boundary" } ], "clip_score": 27.078125, "loss": 1.3150209188461304 }, { "id": "identity_20985", "image": "ScienceQA/images/405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Percy and Cole trade to each get what they want?\nChoices:\n(A) Percy can trade his tomatoes for Cole's broccoli.\n(B) Cole can trade his broccoli for Percy's oranges.\n(C) Cole can trade his almonds for Percy's tomatoes.\n(D) Percy can trade his tomatoes for Cole's carrots." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.90625, "loss": 3.101572036743164 }, { "id": "identity_168570", "image": "TabMWP/images/32395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $220, by what percentage does quantity supplied exceed quantity demanded?" }, { "from": "gpt", "value": "The answer is Quantity supplied is 17.53% less than demanded" } ], "clip_score": 33.125, "loss": 2.8265109062194824 }, { "id": "identity_17245", "image": "PMC-VQA/images/PMC6701203_F3_25752.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the stereomicroscope image? \nChoices:\n(A) root apical meristem (ram) \n(B) vascular bundles \n(C) stomata \n(D) shoot apical meristem (sam) " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.171875, "loss": 3.943589687347412 }, { "id": "identity_334896", "image": "PMC-VQA/images/PMC7914843_molecules-26-00847-f002_377457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is indicated by the blue triangle?\nChoices:\n(A) Muscle tissue\n(B) Nervous tissue\n(C) Bone tissue\n(D) Epithelial tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.6875, "loss": 4.862650394439697 }, { "id": "identity_261546", "image": "FigureQA/images/53541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the smallest option available?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.703125, "loss": 3.808171033859253 }, { "id": "identity_291731", "image": "VQA2.0/images/165347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the color of the bus?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 25.71875, "loss": 3.4891586303710938 }, { "id": "identity_151471", "image": "TQA/images/question_images/human_system_reproductory_7015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled as 11?" }, { "from": "gpt", "value": "The answer is Delivery of sperm during ejaculation" } ], "clip_score": 25.171875, "loss": 2.407975196838379 }, { "id": "identity_129603", "image": "VQA-AS/images/17181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is depicted in the paintings on the wall?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 22.078125, "loss": 3.314880132675171 }, { "id": "identity_200258", "image": "PMC-VQA/images/PMC4356427_f1-ol-09-04-1667_366612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date displayed on the image?" }, { "from": "gpt", "value": "The answer is 20080309" } ], "clip_score": 18.734375, "loss": 1.240132451057434 }, { "id": "identity_336090", "image": "GeoQA+/images/481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在一个平行四边形中,对角线相交于一点,E是其中一条边的中点,连接OE.若OE=3,则CD的长为()\nChoices:\n(A) 3\n(B) 5\n(C) 6\n(D) 9" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.125, "loss": 2.492004871368408 }, { "id": "identity_317071", "image": "VQA2.0/images/406205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 17.359375, "loss": 3.324414014816284 }, { "id": "identity_145761", "image": "DVQA/images/bar_train_00081298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the \"drive\" category have a preference of 5 or more people?" }, { "from": "gpt", "value": "The answer is 2 objects" } ], "clip_score": 25.359375, "loss": 2.729599714279175 }, { "id": "identity_221967", "image": "TextVQA/images/3877144880f54acd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the Lewyt Corporation as provided in the advertisement?" }, { "from": "gpt", "value": "The answer is 62 Broadway, Brooklyn, 11, New York" } ], "clip_score": 21.4375, "loss": 2.3663928508758545 }, { "id": "identity_270035", "image": "VQA2.0/images/521867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains?" }, { "from": "gpt", "value": "The answer is pink" } ], "clip_score": 24.046875, "loss": 3.5391101837158203 }, { "id": "identity_186085", "image": "PMC-VQA/images/PMC8977489_F5_242901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sets of images are presented in this figure?" }, { "from": "gpt", "value": "The answer is Two sets" } ], "clip_score": 19.734375, "loss": 4.266287326812744 }, { "id": "identity_304641", "image": "VQA2.0/images/201659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mugs are present on this surface?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 13.703125, "loss": 2.172957181930542 }, { "id": "identity_153664", "image": "ScienceQA/images/7489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many planets in the solar system are primarily composed of rock?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.46875, "loss": 2.2442989349365234 }, { "id": "identity_126239", "image": "TextVQA/images/01209963ab93abd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many water bottles can be seen on the podium?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 12.0390625, "loss": 4.54799222946167 }, { "id": "identity_279868", "image": "FigureQA/images/79301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray the most rough?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.296875, "loss": 4.34717321395874 }, { "id": "identity_245793", "image": "A-OKVQA/images/155167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the specific instructions given by the street signs for drivers at this intersection?" }, { "from": "gpt", "value": "The answer is Travel right, no stopping anytime" } ], "clip_score": 25.96875, "loss": 3.988433837890625 }, { "id": "identity_200636", "image": "VizWiz/images/VizWiz_train_00007304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this medication for prescription use only?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.625, "loss": 2.917874574661255 }, { "id": "identity_6639", "image": "GeoQA+/images/5225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠BAC=90°,AB=6,AC=8,P是斜边BC上一动点,PE⊥AB于E,PF⊥AC于F,EF与AP相交于点O,则OF的最小值为()\nChoices:\n(A) 4.8\n(B) 1.2\n(C) 3.6\n(D) 2.4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.375, "loss": 2.2634177207946777 }, { "id": "identity_175846", "image": "IconQA/images/89945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips are not connected to any other paper clip?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.5, "loss": 2.5138142108917236 }, { "id": "identity_86952", "image": "Geometry3K/images/1137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 36.59375, "loss": 2.868650197982788 }, { "id": "identity_317060", "image": "CLEVR-Math/images/CLEVR_train_012300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove one of these. How many of these are left?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 19.953125, "loss": 2.5753586292266846 }, { "id": "identity_149257", "image": "DocVQA/images/frld0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of convening thought leaders according to the Neurontin 1998 Tactical Plan?" }, { "from": "gpt", "value": "The answer is To discuss treatment options and decision criteria" } ], "clip_score": 25.640625, "loss": 3.444314479827881 }, { "id": "identity_221985", "image": "VQA2.0/images/327895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement for on the back of the red bus?" }, { "from": "gpt", "value": "The answer is FergoGlobin" } ], "clip_score": 32.375, "loss": 3.119211435317993 }, { "id": "identity_174322", "image": "TabMWP/images/10107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of kilometers walked by Clara over the past 5 days?" }, { "from": "gpt", "value": "The answer is 8 kilometers" } ], "clip_score": 27.640625, "loss": 2.7545127868652344 }, { "id": "identity_160809", "image": "GeoQA+/images/4237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠D in degrees if ∠AOB is 160° and O is the center of the circle?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 35.59375, "loss": 1.7995765209197998 }, { "id": "identity_171172", "image": "IconQA/images/53298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you had five nails of the same size, what would be their total length to the nearest inch?" }, { "from": "gpt", "value": "The answer is 13 inches" } ], "clip_score": 32.28125, "loss": 2.2250592708587646 }, { "id": "identity_88198", "image": "GeoQA+/images/5375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that pentagon ABCDE is symmetrical about line l, what is the measure of ∠BAE?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 35.9375, "loss": 1.9787921905517578 }, { "id": "identity_250397", "image": "Geometry3K/images/1067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the area of the triangle using the given side and angles. Round your answer to the nearest tenth" }, { "from": "gpt", "value": "The answer is 141.6" } ], "clip_score": 33.4375, "loss": 2.320067882537842 }, { "id": "identity_166029", "image": "IconQA/images/40599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the letter 'G' is rotated back to its original position, what transformation would be applied?" }, { "from": "gpt", "value": "The answer is The letter 'G' would be rotated counterclockwise by 180 degrees" } ], "clip_score": 27.5, "loss": 1.613508701324463 }, { "id": "identity_294196", "image": "VQA2.0/images/356938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For what purpose is the clock utilized?" }, { "from": "gpt", "value": "The answer is time" } ], "clip_score": 19.9375, "loss": 4.31773567199707 }, { "id": "identity_289363", "image": "PMC-VQA/images/PMC4357143_Fig2_366897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the computed tomogram image, which lobe shows bullous changes?\nChoices:\n(A) Left lower lobe\n(B) Right upper lobe\n(C) Right lower lobe\n(D) Left upper lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 4.188498020172119 }, { "id": "identity_55632", "image": "FigureQA/images/96928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Khaki have a consistent trend throughout the graph?" }, { "from": "gpt", "value": "The answer is No, it fluctuates" } ], "clip_score": 30.40625, "loss": 2.751972198486328 }, { "id": "identity_183394", "image": "Super-CLEVR/images/superCLEVR_new_000842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the scooter similar to any other object in the image?" }, { "from": "gpt", "value": "The answer is Yes, it is similar to the car" } ], "clip_score": 26.96875, "loss": 2.5309340953826904 }, { "id": "identity_65353", "image": "AI2D/images/4326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the diagram, which phase of matter can change its shape but not its volume?" }, { "from": "gpt", "value": "The answer is Liquid" } ], "clip_score": 35.96875, "loss": 1.9918628931045532 }, { "id": "identity_23677", "image": "TabMWP/images/8473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A pizza chef recalled how many pizzas he had made during the past 5 days. What is the mean of the numbers?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.15625, "loss": 2.7514936923980713 }, { "id": "identity_108737", "image": "VizWiz/images/VizWiz_train_00007113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pen have a cap?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.5625, "loss": 3.5855350494384766 }, { "id": "identity_254395", "image": "PMC-VQA/images/PMC6627094_ijms-20-03047-f007_7646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific technique was used to image HER2 expression in the study, and what is the significance of the highlighted regions in the image?" }, { "from": "gpt", "value": "The answer is PET scan, high HER2 expression regions" } ], "clip_score": 33.25, "loss": 4.248384952545166 }, { "id": "identity_125193", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have wheels in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.296875, "loss": 2.5582594871520996 }, { "id": "identity_57578", "image": "DVQA/images/bar_train_00166070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets have an accuracy of 5 or higher for the 'bar' algorithm?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 33.71875, "loss": 2.316777467727661 }, { "id": "identity_228927", "image": "VQA2.0/images/467646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas are visible in the image?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 28.0, "loss": 2.379547119140625 }, { "id": "identity_109515", "image": "VQA2.0/images/436046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella the person is holding?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 12.0859375, "loss": 3.4004197120666504 }, { "id": "identity_93297", "image": "TabMWP/images/13081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the table, can we conclude that the function has a constant rate of change?" }, { "from": "gpt", "value": "The answer is No, the rate of change is not constant" } ], "clip_score": 24.84375, "loss": 2.22348690032959 }, { "id": "identity_135253", "image": "DocVQA/images/tgwd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter?" }, { "from": "gpt", "value": "The answer is December 20, 1971" } ], "clip_score": 24.9375, "loss": 1.5847978591918945 }, { "id": "identity_143914", "image": "MapQA/images/map_10693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the lowest value range category?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 26.359375, "loss": 4.077949047088623 }, { "id": "identity_91275", "image": "Geometry3K/images/262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle ABC?" }, { "from": "gpt", "value": "The answer is The perimeter is 18 + √164" } ], "clip_score": 33.5625, "loss": 2.7024173736572266 }, { "id": "identity_180609", "image": "IconQA/images/34565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What 3D shape is represented in the image?" }, { "from": "gpt", "value": "The answer is Cube" } ], "clip_score": 34.375, "loss": 3.0003395080566406 }, { "id": "identity_119274", "image": "TextVQA/images/54a6ec25c242fddf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the poster?" }, { "from": "gpt", "value": "The answer is Anthropology and the study of human remains" } ], "clip_score": 27.34375, "loss": 2.798105001449585 }, { "id": "identity_14914", "image": "VQA-AS/images/4255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the woman wish she had a deer rifle?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 24.140625, "loss": 2.8839335441589355 }, { "id": "identity_118660", "image": "TextVQA/images/60af2970123687cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of points scored by both teams?" }, { "from": "gpt", "value": "The answer is 162 points" } ], "clip_score": 17.1875, "loss": 2.256667137145996 }, { "id": "identity_88759", "image": "GeoQA+/images/96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is equal to the length of OA, what is the area of circle O?" }, { "from": "gpt", "value": "The answer is 9π" } ], "clip_score": 33.59375, "loss": 2.8292007446289062 }, { "id": "identity_23161", "image": "DocVQA/images/ffbg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does Clin. PM stand for?" }, { "from": "gpt", "value": "The answer is CLINICAL PREVENTIVE MEDICINE" } ], "clip_score": 16.328125, "loss": 1.212180733680725 }, { "id": "identity_1284", "image": "ScienceQA/images/4456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the reptile below.\nChoices:\n(A) coral snake\n(B) koala" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.5625, "loss": 3.81421160697937 }, { "id": "identity_27393", "image": "DVQA/images/bar_train_00088906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.984375, "loss": 3.8159103393554688 }, { "id": "identity_337514", "image": "PMC-VQA/images/PMC2488350_F4_25949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the three parts of the midgut called?\nChoices:\n(A) Front, Middle, Back \n(B) Anterior, Posterior, Dorsal \n(C) Superior, Inferior, Anterior \n(D) Cranial, Caudal, Medial" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6171875, "loss": 4.115179538726807 }, { "id": "identity_25071", "image": "VQA2.0/images/445761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is on the man's shirt?" }, { "from": "gpt", "value": "The answer is 74" } ], "clip_score": 14.1015625, "loss": 1.954978346824646 }, { "id": "identity_138759", "image": "FigureQA/images/91155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Orange between x-axis labels 0 and 100?" }, { "from": "gpt", "value": "The answer is Approximately 65.5" } ], "clip_score": 24.59375, "loss": 2.2373063564300537 }, { "id": "identity_93440", "image": "TabMWP/images/27163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the largest number of red buttons in a bag?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 26.875, "loss": 2.3896453380584717 }, { "id": "identity_208271", "image": "VQA2.0/images/338637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing in line at the food truck?" }, { "from": "gpt", "value": "The answer is Nine" } ], "clip_score": 31.46875, "loss": 3.0492775440216064 }, { "id": "identity_287399", "image": "IconQA/images/47983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on: green or white?\nChoices:\n(A) green\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 3.7375388145446777 }, { "id": "identity_178935", "image": "TabMWP/images/2661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of boxes sold per student?" }, { "from": "gpt", "value": "The answer is 2.69" } ], "clip_score": 23.765625, "loss": 2.2145817279815674 }, { "id": "identity_97984", "image": "TabMWP/images/11645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the most expensive and least expensive animal?" }, { "from": "gpt", "value": "The answer is $52" } ], "clip_score": 29.75, "loss": 2.7317745685577393 }, { "id": "identity_93752", "image": "TabMWP/images/17854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of science articles read?" }, { "from": "gpt", "value": "The answer is 6.5" } ], "clip_score": 34.5, "loss": 2.183865547180176 }, { "id": "identity_51318", "image": "FigureQA/images/97576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the lowest ending value at x-axis 100?" }, { "from": "gpt", "value": "The answer is Light Seafoam" } ], "clip_score": 25.796875, "loss": 2.5192084312438965 }, { "id": "identity_109682", "image": "Super-CLEVR/images/superCLEVR_new_000006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large vehicle located at the center of the image?" }, { "from": "gpt", "value": "The answer is Purple and green" } ], "clip_score": 24.984375, "loss": 3.374668598175049 }, { "id": "identity_300953", "image": "A-OKVQA/images/581394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals doing near the bus?\nChoices:\n(A) dancing\n(B) cleaning\n(C) sleeping\n(D) packing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.1875, "loss": 3.970566987991333 }, { "id": "identity_29170", "image": "CLEVR-Math/images/CLEVR_train_026027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all gray rubber cylinders. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.609375, "loss": 2.291867971420288 }, { "id": "identity_93800", "image": "TabMWP/images/30195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many necklaces have more than 30 beads?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 34.375, "loss": 3.034921884536743 }, { "id": "identity_249978", "image": "DVQA/images/bar_train_00139303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the bars in the third group from the bottom?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 21.84375, "loss": 2.048654556274414 }, { "id": "identity_202621", "image": "A-OKVQA/images/542452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the 'No Parking' restriction end?" }, { "from": "gpt", "value": "The answer is 6 PM" } ], "clip_score": 13.4453125, "loss": 2.2099833488464355 }, { "id": "identity_32736", "image": "VQA2.0/images/136886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is winning?" }, { "from": "gpt", "value": "The answer is dark shirts" } ], "clip_score": 16.34375, "loss": 3.230957508087158 }, { "id": "identity_141942", "image": "DVQA/images/bar_train_00138423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'appeal' category consistently present in all groups?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.90625, "loss": 3.282998561859131 }, { "id": "identity_6251", "image": "VQA-AS/images/5681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many small squirrels are on the ground?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.578125, "loss": 2.273887872695923 }, { "id": "identity_294552", "image": "TabMWP/images/36045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the data collected in the efficiency improvement program, what was the difference in the average food preparation time at the restaurant between Tuesday and Wednesday?" }, { "from": "gpt", "value": "The answer is -37" } ], "clip_score": 34.21875, "loss": 3.53505277633667 }, { "id": "identity_82294", "image": "GeoQA+/images/5699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 5 and AB is a chord of length 8, what is the area of the minor segment cut off by chord AB?" }, { "from": "gpt", "value": "The answer is (1/2) * π * 25 - 12 ≈ 27.1 square units (rounded to the nearest tenth)" } ], "clip_score": 34.8125, "loss": 1.8408716917037964 }, { "id": "identity_178809", "image": "CLEVR-Math/images/CLEVR_train_024469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shiny objects are there?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.859375, "loss": 2.371905565261841 }, { "id": "identity_169802", "image": "TabMWP/images/10022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of contestants with a red beard to those with a grey beard in the mustache competition?" }, { "from": "gpt", "value": "The answer is 9:2" } ], "clip_score": 16.640625, "loss": 1.5333950519561768 }, { "id": "identity_225026", "image": "GeoQA+/images/1031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of AB is 8 cm and the length of AD is 6 cm, what is the length of segment GF after folding the rectangular strip ABCD along EF, given that ∠BGE = 130°?" }, { "from": "gpt", "value": "The answer is 5 cm" } ], "clip_score": 34.78125, "loss": 2.0671591758728027 }, { "id": "identity_132442", "image": "A-OKVQA/images/469282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of water body are the boats floating on?" }, { "from": "gpt", "value": "The answer is Sea" } ], "clip_score": 23.09375, "loss": 3.188082695007324 }, { "id": "identity_268611", "image": "TextVQA/images/fbcf0348c9f55b3d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ranking of the player who is in the first position?" }, { "from": "gpt", "value": "The answer is delivery network " } ], "clip_score": 26.96875, "loss": 6.730310916900635 }, { "id": "identity_117031", "image": "VQA2.0/images/233366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is directly under the pineapple?" }, { "from": "gpt", "value": "The answer is Apple" } ], "clip_score": 24.46875, "loss": 3.711686611175537 }, { "id": "identity_29233", "image": "A-OKVQA/images/327713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of station is in this area?\nChoices:\n(A) bus\n(B) gas\n(C) train\n(D) fire" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.5546875, "loss": 4.082946300506592 }, { "id": "identity_134238", "image": "DVQA/images/bar_train_00080590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a 'fight' value greater than 5?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.96875, "loss": 2.5142340660095215 }, { "id": "identity_130282", "image": "A-OKVQA/images/439837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the plates stacked on the counter?" }, { "from": "gpt", "value": "The answer is Blue floral pattern" } ], "clip_score": 22.234375, "loss": 2.9825615882873535 }, { "id": "identity_184998", "image": "A-OKVQA/images/174896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bottle cap next to the bread?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 13.953125, "loss": 4.181703090667725 }, { "id": "identity_146587", "image": "DVQA/images/bar_train_00022627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the smallest difference in accuracy between the two algorithms?" }, { "from": "gpt", "value": "The answer is soil and jungle (tie)" } ], "clip_score": 33.25, "loss": 2.526367425918579 }, { "id": "identity_76229", "image": "AI2D/images/3368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pistils are present in the Magnolia flower as shown in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.75, "loss": 2.550262212753296 }, { "id": "identity_195524", "image": "A-OKVQA/images/449798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the mousepad?" }, { "from": "gpt", "value": "The answer is Multicolored" } ], "clip_score": 20.375, "loss": 3.0629520416259766 }, { "id": "identity_177492", "image": "TabMWP/images/19502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price of light brown sugar increases by 10%, what will be the new price per kilogram?" }, { "from": "gpt", "value": "The answer is $1.749 per kg" } ], "clip_score": 34.65625, "loss": 2.3947713375091553 }, { "id": "identity_78597", "image": "TQA/images/abc_question_images/blastocyst_19029.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the fluid-filled cavity within the blastocyst?" }, { "from": "gpt", "value": "The answer is Label T" } ], "clip_score": 28.015625, "loss": 3.5493037700653076 }, { "id": "identity_85681", "image": "UniGeo/images/1427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠QPR?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 32.84375, "loss": 2.174729585647583 }, { "id": "identity_14502", "image": "TextVQA/images/1ea65daf0d469bfa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who are the people that get it?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 14.6875, "loss": 4.975092887878418 }, { "id": "identity_61202", "image": "DocVQA/images/grvv0228_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in salary for A. E. Schaefer from 1979 to 1980?" }, { "from": "gpt", "value": "The answer is 8.37%" } ], "clip_score": 28.640625, "loss": 1.955401062965393 }, { "id": "identity_4473", "image": "PMC-VQA/images/PMC5799328_Fig4_271947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the labels I-III correspond to in the figures? \nChoices:\n(A) different types of imaging techniques \n(B) different types of stains \n(C) different positions along the aorta \n(D) different time points of the imaging process " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.109375, "loss": 4.22182559967041 }, { "id": "identity_167776", "image": "TabMWP/images/32326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of athletes from all the countries combined?" }, { "from": "gpt", "value": "The answer is 469" } ], "clip_score": 28.5, "loss": 2.402522087097168 }, { "id": "identity_225720", "image": "TQA/images/question_images/cycle_carbon_5026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the complete process of how nitrogen from the atmosphere is converted into a form usable by plants and then returned back to the atmosphere" }, { "from": "gpt", "value": "The answer is Nitrogen is fixed, assimilated, decomposed, and denitrified back to N₂" } ], "clip_score": 30.25, "loss": 2.2228691577911377 }, { "id": "identity_280394", "image": "UniGeo/images/562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, a cone is formed by surrounding a circle with sector paper with a radius of 10 and a central angle of 144° (ignoring the seam). What is the radius of the base of this cone?\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.25, "loss": 2.3790032863616943 }, { "id": "identity_175552", "image": "TabMWP/images/32020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day were the least number of pumpkins sold?" }, { "from": "gpt", "value": "The answer is Friday and Monday" } ], "clip_score": 37.8125, "loss": 3.558673620223999 }, { "id": "identity_327803", "image": "TabMWP/images/21570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Aiden tracked his coffee intake and sleep duration for several weeks. He noted the number of cups of coffee he drank in the morning and the hours he slept that night. What is the chance that a randomly chosen day is one where he slept for 7 hours and drank 1 cup of coffee? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 1/5" } ], "clip_score": 33.0, "loss": 1.8269400596618652 }, { "id": "identity_55864", "image": "DocVQA/images/pnbx0223_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of members present per meeting?" }, { "from": "gpt", "value": "The answer is 2.75" } ], "clip_score": 19.078125, "loss": 2.024867296218872 }, { "id": "identity_49452", "image": "MapQA/images/map_10373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest percentage range in the Midwest region?" }, { "from": "gpt", "value": "The answer is Ohio" } ], "clip_score": 28.75, "loss": 2.951643705368042 }, { "id": "identity_145229", "image": "DocVQA/images/ljgf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter?" }, { "from": "gpt", "value": "The answer is May 15, 1961" } ], "clip_score": 18.9375, "loss": 2.0269293785095215 }, { "id": "identity_231412", "image": "IconQA/images/52103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more yellow planes are there than red planes?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 27.078125, "loss": 2.2029688358306885 }, { "id": "identity_137416", "image": "FigureQA/images/77841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Violet on the y-axis label?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.296875, "loss": 3.478090286254883 }, { "id": "identity_226894", "image": "TextVQA/images/689df70804031e18.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the tweet mentioning Ken Jennings and Watson, and what is the date of the news article about Ellen Steinberg?" }, { "from": "gpt", "value": "The answer is February 27, 2011 and February 24, 2011" } ], "clip_score": 18.84375, "loss": 1.3242791891098022 }, { "id": "identity_140557", "image": "DocVQA/images/frld0217_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main strategy described in the Neurontin 1998 Tactical Plan?" }, { "from": "gpt", "value": "The answer is TMs to identify and work with high prescribing physicians in developing a real case history where Neurontin was an integral part of successful seizure control" } ], "clip_score": 29.640625, "loss": 2.920046806335449 }, { "id": "identity_7108", "image": "ScienceQA/images/5658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these cities is marked on the map?\nChoices:\n(A) San Antonio\n(B) Charlotte\n(C) Nashville\n(D) Oklahoma City" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.078125, "loss": 3.5689773559570312 }, { "id": "identity_40786", "image": "FigureQA/images/25074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the color Sienna?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 19.109375, "loss": 2.780278205871582 }, { "id": "identity_5035", "image": "TQA/images/question_images/parts_worm_7315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the end of the flatworm called?\nChoices:\n(A) male pore\n(B) prostomium\n(C) genital markings\n(D) clitellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 4.719771385192871 }, { "id": "identity_142768", "image": "DVQA/images/bar_train_00140823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest value for the 'grace' category?" }, { "from": "gpt", "value": "The answer is mature" } ], "clip_score": 21.078125, "loss": 3.9179160594940186 }, { "id": "identity_253157", "image": "MapQA/images/map_2051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border Washington, which state has the highest percentage rate, and what is that rate?" }, { "from": "gpt", "value": "The answer is Oregon, 45.3%-47.5%" } ], "clip_score": 35.15625, "loss": 2.347665786743164 }, { "id": "identity_26308", "image": "A-OKVQA/images/157635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the TV?\nChoices:\n(A) sporting event\n(B) judge show\n(C) cartoons\n(D) baking show" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.953125, "loss": 4.253356456756592 }, { "id": "identity_282067", "image": "DocVQA/images/tqjp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is referenced in this letter?" }, { "from": "gpt", "value": "The answer is JANUARY 3RD, 1980" } ], "clip_score": 27.328125, "loss": 1.5410176515579224 }, { "id": "identity_193966", "image": "PMC-VQA/images/PMC5763060_fig3_262113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is used to capture this image?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 31.59375, "loss": 1.9494353532791138 }, { "id": "identity_9790", "image": "GeoQA+/images/1121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,PA,PB分别是⊙O的切线,A,B为切点,AC是⊙O的直径,已知∠BAC=35°,则∠P的度数是()\nChoices:\n(A) 50°\n(B) 60°\n(C) 70°\n(D) 80°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 2.647679090499878 }, { "id": "identity_144765", "image": "DocVQA/images/qllg0023_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the handwritten response to question number 4 on the questionnaire?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.453125, "loss": 3.173062801361084 }, { "id": "identity_164871", "image": "GeoQA+/images/1450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle ABD in the quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is The area of triangle ABD is calculated using the length of AD and height from B" } ], "clip_score": 34.4375, "loss": 2.3237147331237793 }, { "id": "identity_11891", "image": "DVQA/images/bar_train_00120112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the smallest individual bar in the whole chart?" }, { "from": "gpt", "value": "The answer is -6" } ], "clip_score": 26.6875, "loss": 2.8665831089019775 }, { "id": "identity_19919", "image": "DocVQA/images/lybx0223_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date mentioned ?" }, { "from": "gpt", "value": "The answer is May 29, 2012" } ], "clip_score": 15.75, "loss": 1.6737391948699951 }, { "id": "identity_320350", "image": "PMC-VQA/images/PMC7878549_F2_366617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the condition indicated by the white arrows in this image?\nChoices:\n(A) Perilesional edema \n(B) Infarcted tissue \n(C) Brain metastasis \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.921875, "loss": 4.8616437911987305 }, { "id": "identity_120912", "image": "PMC-VQA/images/PMC5415772_Fig1_160019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of contrast enhancement in the lesion?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.1875, "loss": 2.773482084274292 }, { "id": "identity_190570", "image": "PMC-VQA/images/PMC3522500_fig2_173028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pleural effusion in the image?" }, { "from": "gpt", "value": "The answer is No evidence of pleural effusion" } ], "clip_score": 25.921875, "loss": 2.696838140487671 }, { "id": "identity_167955", "image": "TabMWP/images/26027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more baseballs did the player hit on Thursday compared to Monday?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 36.78125, "loss": 2.6196863651275635 }, { "id": "identity_208143", "image": "PMC-VQA/images/PMC6548024_fig5_488939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the color scale at the top indicate?" }, { "from": "gpt", "value": "The answer is Area extension percentage" } ], "clip_score": 19.078125, "loss": 3.1173794269561768 }, { "id": "identity_329018", "image": "PMC-VQA/images/PMC3311096_F6_131324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy is used to capture this image?\nChoices:\n(A) SEM,\n(B) AFM,\n(C) TEM,\n(D) STM." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.25, "loss": 3.8955864906311035 }, { "id": "identity_231857", "image": "Super-CLEVR/images/superCLEVR_new_000965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the size of the blue mountain bike on the left side of the blue rubber car to the size of the car on the left side of the small metallic bus?" }, { "from": "gpt", "value": "The answer is approximately 0.5" } ], "clip_score": 25.75, "loss": 2.211843729019165 }, { "id": "identity_282439", "image": "PMC-VQA/images/PMC7853585_F1_358067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cell is depicted in the image provided? \nOptions:\n(A) HepG2\n(B) HeLa\n(C) SMMC-7721\n(D) A549" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.65625, "loss": 4.103673934936523 }, { "id": "identity_196826", "image": "PMC-VQA/images/PMC6221646_F1_392942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of rib fractures?" }, { "from": "gpt", "value": "The answer is No visible rib fractures" } ], "clip_score": 26.25, "loss": 2.9746623039245605 }, { "id": "identity_152238", "image": "ScienceQA/images/1058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What body of water is located to the west of the highlighted country?" }, { "from": "gpt", "value": "The answer is Gulf of Gonâve" } ], "clip_score": 25.21875, "loss": 2.7315335273742676 }, { "id": "identity_74139", "image": "ScienceQA/images/15990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries border the highlighted country?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.125, "loss": 2.377265691757202 }, { "id": "identity_224849", "image": "AI2D/images/1912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the complete energy flow pathway from plants to birds in the soil food web diagram" }, { "from": "gpt", "value": "The answer is Plants → Nematodes → Arthropods → Arthropods → Birds" } ], "clip_score": 28.796875, "loss": 1.7486058473587036 }, { "id": "identity_335927", "image": "TabMWP/images/32761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Addison has $0.11. Does she have enough to buy these two items: an orange marker and a mechanical pencil?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 3.2472164630889893 }, { "id": "identity_154883", "image": "ScienceQA/images/6300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface texture does the basketball have?" }, { "from": "gpt", "value": "The answer is Dimpled texture" } ], "clip_score": 30.046875, "loss": 3.351463556289673 }, { "id": "identity_337409", "image": "DocVQA/images/gfmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the emp. no. of Herman W Arnett Jr?" }, { "from": "gpt", "value": "The answer is 052008" } ], "clip_score": 21.5625, "loss": 2.501728057861328 }, { "id": "identity_225918", "image": "A-OKVQA/images/107960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image, and how does it connect to the ground?" }, { "from": "gpt", "value": "The answer is Motorcycle, wheels" } ], "clip_score": 24.0625, "loss": 4.08880090713501 }, { "id": "identity_79903", "image": "UniGeo/images/1170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if AB is the diameter and ∠ADC = 65°, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 38.15625, "loss": 1.695321798324585 }, { "id": "identity_179129", "image": "TabMWP/images/2634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of bricks per building?" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 25.078125, "loss": 2.873931646347046 }, { "id": "identity_209594", "image": "VQA2.0/images/150164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Rainy" } ], "clip_score": 25.578125, "loss": 2.4615719318389893 }, { "id": "identity_1684", "image": "FigureQA/images/20186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Aqua greater than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.036404132843018 }, { "id": "identity_113595", "image": "PMC-VQA/images/PMC7899876_F1_372376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any lesions or abnormalities present in the retina?" }, { "from": "gpt", "value": "The answer is Yes, there are yellowish lesions" } ], "clip_score": 33.90625, "loss": 2.776743173599243 }, { "id": "identity_224509", "image": "TQA/images/abc_question_images/nuclear_energy_17095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the component that connects the secondary loop to the heat exchanger and describe its function" }, { "from": "gpt", "value": "The answer is H" } ], "clip_score": 27.921875, "loss": 4.268886566162109 }, { "id": "identity_22992", "image": "PMC-VQA/images/PMC4643987_pone.0142956.g008_443715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is evident in the anterior region? \nChoices:\n(A) Dopamine-IR \n(B) Acetylcholine-IR \n(C) GPR axon bundle \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.796875, "loss": 3.7589564323425293 }, { "id": "identity_313138", "image": "TabMWP/images/23973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a superhero chosen at random from a list compiled by a creative writing class, which includes each superhero's superpower and personality flaw, can read minds and is lazy? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 19.578125, "loss": 1.6203302145004272 }, { "id": "identity_198094", "image": "PMC-VQA/images/PMC4454843_F3_393174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might the different colors in the image represent?" }, { "from": "gpt", "value": "The answer is Levels of brain activity or functions" } ], "clip_score": 26.5625, "loss": 3.4234251976013184 }, { "id": "identity_226094", "image": "IconQA/images/24010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Shane's mom reads for 45 minutes before work and then takes 30 minutes to get ready, what time does she leave for work?" }, { "from": "gpt", "value": "The answer is 8:15 A.M" } ], "clip_score": 23.921875, "loss": 1.8211170434951782 }, { "id": "identity_88471", "image": "GEOS/images/064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the circle O if the length of arc AB is 4*pi and the measure of angle AOB is 40 degrees?" }, { "from": "gpt", "value": "The answer is 81*pi" } ], "clip_score": 30.015625, "loss": 2.6444578170776367 }, { "id": "identity_149857", "image": "DocVQA/images/plkp0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number of Heights Travel Service Inc.?" }, { "from": "gpt", "value": "The answer is 212-522-1400" } ], "clip_score": 19.265625, "loss": 1.8211662769317627 }, { "id": "identity_143002", "image": "DocVQA/images/skxv0001_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fax number of the recipient?" }, { "from": "gpt", "value": "The answer is 7598" } ], "clip_score": 21.09375, "loss": 2.6519734859466553 }, { "id": "identity_167532", "image": "IconQA/images/66217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the triangles identical in size and shape?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.109375, "loss": 3.124878406524658 }, { "id": "identity_39790", "image": "DVQA/images/bar_train_00079667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per store for the item \"tail\"?" }, { "from": "gpt", "value": "The answer is 12.5" } ], "clip_score": 29.421875, "loss": 1.8535997867584229 }, { "id": "identity_164699", "image": "Geometry3K/images/1168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of all the angles in the diagram?" }, { "from": "gpt", "value": "The answer is 630°" } ], "clip_score": 31.890625, "loss": 2.348944664001465 }, { "id": "identity_304731", "image": "ScienceQA/images/8251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the scientific name of this creature?\nChoices:\n(A) Pterodactylus antiquus\n(B) Vectidraco daisymorrisae" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.4375, "loss": 3.9054489135742188 }, { "id": "identity_113965", "image": "VQA-AS/images/4226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object the child on the swing is holding?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.265625, "loss": 3.1312432289123535 }, { "id": "identity_307432", "image": "ChartQA/images/4215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the largest bar 41?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.1796875, "loss": 3.541344404220581 }, { "id": "identity_66107", "image": "ScienceQA/images/99.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the two magnets in Pair 2?" }, { "from": "gpt", "value": "The answer is 2 inches" } ], "clip_score": 26.671875, "loss": 1.6002206802368164 }, { "id": "identity_249804", "image": "DVQA/images/bar_train_00192453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of the values for the stack with the highest individual element value?" }, { "from": "gpt", "value": "The answer is 17.5" } ], "clip_score": 22.84375, "loss": 1.6999846696853638 }, { "id": "identity_116218", "image": "Super-CLEVR/images/superCLEVR_new_001341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the green airplane relative to the purple motorcycle?" }, { "from": "gpt", "value": "The answer is To the right and behind" } ], "clip_score": 22.078125, "loss": 3.3714630603790283 }, { "id": "identity_14324", "image": "A-OKVQA/images/300499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the woman carrying her bag in the rain?\nChoices:\n(A) back\n(B) basket\n(C) shoulder\n(D) seat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.296875, "loss": 4.627816200256348 }, { "id": "identity_298325", "image": "A-OKVQA/images/398301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this event category? \nOptions:\n(A) farmers market\n(B) fair\n(C) flea market\n(D) garage sale" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.4375, "loss": 3.9508395195007324 }, { "id": "identity_92542", "image": "TabMWP/images/14765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the broken plates per shipment?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 18.546875, "loss": 3.1539764404296875 }, { "id": "identity_136149", "image": "MapQA/images/map_5338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest number of neighbors according to the color legend?" }, { "from": "gpt", "value": "The answer is Alaska" } ], "clip_score": 32.6875, "loss": 2.3604743480682373 }, { "id": "identity_269255", "image": "TQA/images/question_images/cycle_rock_6724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of rocks are located beneath the sedimentary rocks?\nChoices:\n(A) Sediments\n(B) Metamorphic rocks\n(C) Solidification\n(D) Igneous rocks" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.046875, "loss": 4.574548244476318 }, { "id": "identity_331796", "image": "Super-CLEVR/images/superCLEVR_new_001306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many other items are there of the same color as the small metallic jet?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.140625, "loss": 2.4924588203430176 }, { "id": "identity_287501", "image": "PMC-VQA/images/PMC7564854_F0001_275742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which part of the clavicle is depicted?\nChoices:\n(A) Medial end\n(B) Lateral end\n(C) Middle of the clavicle\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.921875, "loss": 4.218317985534668 }, { "id": "identity_78052", "image": "AI2D/images/4169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process leads to the formation of sedimentary rocks as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Compaction/Cementation" } ], "clip_score": 24.703125, "loss": 1.9345524311065674 }, { "id": "identity_71998", "image": "ScienceQA/images/20468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.59375, "loss": 2.3271522521972656 }, { "id": "identity_106459", "image": "TextVQA/images/408f503828d50f36.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wall to the left of the red cabinets?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.28125, "loss": 4.958982944488525 }, { "id": "identity_41900", "image": "PlotQA/images/109139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of female employers in Germany?" }, { "from": "gpt", "value": "The answer is 2%" } ], "clip_score": 31.109375, "loss": 2.2619082927703857 }, { "id": "identity_166697", "image": "TabMWP/images/29943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Hakim wants to limit his spending to $3 per day, on which days did he exceed his budget?" }, { "from": "gpt", "value": "The answer is Monday and Tuesday" } ], "clip_score": 35.9375, "loss": 1.7152894735336304 }, { "id": "identity_247000", "image": "TabMWP/images/19994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total surplus or shortage if the price changes from $725 to $925" }, { "from": "gpt", "value": "The answer is 18,500" } ], "clip_score": 23.53125, "loss": 1.9451496601104736 }, { "id": "identity_47700", "image": "DVQA/images/bar_train_00138425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy on the 'rice' dataset?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 30.53125, "loss": 3.339257001876831 }, { "id": "identity_100473", "image": "CLEVR-Math/images/CLEVR_train_015248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the yellow cylinder but smaller than the red cylinder?" }, { "from": "gpt", "value": "The answer is 2 objects" } ], "clip_score": 30.765625, "loss": 2.9641973972320557 }, { "id": "identity_240666", "image": "DocVQA/images/zhyc0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average body weight for males aged 15-19 and females aged 45-64 in Santiago and Calama, and how does it compare to the national average for these age groups?" }, { "from": "gpt", "value": "The answer is Males 15-19: 6.0, 10.8; Females 45-64: 0.8, 8.6 pounds below" } ], "clip_score": 20.28125, "loss": 1.8175883293151855 }, { "id": "identity_199734", "image": "PMC-VQA/images/PMC6704110_Fig2_26727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is b2" } ], "clip_score": 19.25, "loss": 2.855623245239258 }, { "id": "identity_89232", "image": "UniGeo/images/10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of side AC is increased by 3cm, what will be the new perimeter of triangle ABC?" }, { "from": "gpt", "value": "The answer is 31cm" } ], "clip_score": 32.6875, "loss": 2.379504919052124 }, { "id": "identity_328582", "image": "VQA-AS/images/5188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the rug?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 24.84375, "loss": 3.769458293914795 }, { "id": "identity_146930", "image": "FigureQA/images/59089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Tan have a higher value than Dark Violet?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.390625, "loss": 3.1843619346618652 }, { "id": "identity_310970", "image": "DVQA/images/bar_train_00126952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have an accuracy greater than 9 in at least one dataset?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 32.34375, "loss": 3.6768250465393066 }, { "id": "identity_240854", "image": "TabMWP/images/26324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of pages written per day over the past 7 days, and how does this compare to the median number of pages written per day?" }, { "from": "gpt", "value": "The answer is Average is 78.14, Median is 78" } ], "clip_score": 34.78125, "loss": 2.1020870208740234 }, { "id": "identity_103736", "image": "TextVQA/images/9aae5672d3f4f1c7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural feature is prominently shown in the illustration?" }, { "from": "gpt", "value": "The answer is Arches" } ], "clip_score": 15.421875, "loss": 3.892987012863159 }, { "id": "identity_270083", "image": "DocVQA/images/qzlf0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the recipient of this letter?" }, { "from": "gpt", "value": "The answer is Dr. Lutz Mueller" } ], "clip_score": 25.484375, "loss": 3.1478796005249023 }, { "id": "identity_80881", "image": "GeoQA+/images/3579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the circle O with radius 5 and point A at a distance of 3 from the center O, what is the length of the diameter of the circle?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 32.875, "loss": 1.9415092468261719 }, { "id": "identity_201865", "image": "VQA2.0/images/33943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the countertop?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.59375, "loss": 3.3389110565185547 }, { "id": "identity_166582", "image": "TabMWP/images/35249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 5 more parakeets are added to 1 tree, how many parakeets will be in that tree?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 28.109375, "loss": 2.457916498184204 }, { "id": "identity_192166", "image": "PMC-VQA/images/PMC4366540_Fig4_369718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of brain atrophy in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.21875, "loss": 2.9935595989227295 }, { "id": "identity_325857", "image": "UniGeo/images/57.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知AB=AC=AD,∠CBD=2∠BDC,∠BAC=44°,则∠CAD的度数是多少?\n\nChoices:\n(A) 68°\n(B) 88°\n(C) 90°\n(D) 112°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.90625, "loss": 2.3662242889404297 }, { "id": "identity_127451", "image": "VQA2.0/images/288609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 15.03125, "loss": 4.291834831237793 }, { "id": "identity_79468", "image": "UniGeo/images/3294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle O, if ∠OBC = 70° and triangle ABC is inscribed in the circle, what is the area of sector BOC if the radius of the circle is 10 units?" }, { "from": "gpt", "value": "The answer is approximately 34.91 square units" } ], "clip_score": 34.53125, "loss": 2.027567148208618 }, { "id": "identity_120257", "image": "VQA2.0/images/98233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the flag on the left side of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 18.578125, "loss": 3.509944438934326 }, { "id": "identity_145865", "image": "DocVQA/images/srgj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the BSE scrip code for Golden Tobacco Ltd.?" }, { "from": "gpt", "value": "The answer is 500151" } ], "clip_score": 21.390625, "loss": 2.0040338039398193 }, { "id": "identity_8152", "image": "DocVQA/images/sllk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What differences in dental caries experience were clearly evident?" }, { "from": "gpt", "value": "The answer is racial differences" } ], "clip_score": 19.296875, "loss": 3.3729758262634277 }, { "id": "identity_313569", "image": "VQA2.0/images/94455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in this photo?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.765625, "loss": 3.5260398387908936 }, { "id": "identity_283969", "image": "VizWiz/images/VizWiz_train_00011598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the lake in the Lakewood area of Washington state where I am currently standing?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 31.84375, "loss": 3.865537643432617 }, { "id": "identity_127577", "image": "PMC-VQA/images/PMC4074584_F4_302021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the structure in figure (a) completely solid?" }, { "from": "gpt", "value": "The answer is No, it is not completely solid" } ], "clip_score": 12.3046875, "loss": 2.446455955505371 }, { "id": "identity_141300", "image": "PlotQA/images/76488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are represented in this graph?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.40625, "loss": 2.6657397747039795 }, { "id": "identity_39733", "image": "DVQA/images/bar_train_00012391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of wise in block?" }, { "from": "gpt", "value": "The answer is 10^4" } ], "clip_score": 18.390625, "loss": 2.748185157775879 }, { "id": "identity_45287", "image": "ChartQA/images/multi_col_45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of female Facebook users in the age group with the lowest male percentage?" }, { "from": "gpt", "value": "The answer is 2.3%" } ], "clip_score": 33.15625, "loss": 1.8172354698181152 }, { "id": "identity_330905", "image": "VizWiz/images/VizWiz_train_00004745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of container is this? The brand." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 23.21875, "loss": 3.973588705062866 }, { "id": "identity_10029", "image": "Geometry3K/images/289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the parallelogram. Round to the nearest tenth if necessary.\nChoices:\n(A) 332.3\n(B) 396\n(C) 471.9\n(D) 616.1" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.375, "loss": 2.6388156414031982 }, { "id": "identity_302920", "image": "TextVQA/images/4d9282b72f83bce1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are we?" }, { "from": "gpt", "value": "The answer is gullfoss" } ], "clip_score": 31.46875, "loss": 2.1580617427825928 }, { "id": "identity_131482", "image": "VQA2.0/images/303650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is the game being played?" }, { "from": "gpt", "value": "The answer is Evening" } ], "clip_score": 17.84375, "loss": 3.3963890075683594 }, { "id": "identity_191749", "image": "PMC-VQA/images/PMC7681181_Fig14_302897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to capture this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.4375, "loss": 2.6869537830352783 }, { "id": "identity_211548", "image": "VQA2.0/images/114473.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cart being pulled by the horses?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 30.40625, "loss": 3.0776801109313965 }, { "id": "identity_241175", "image": "CLEVR-Math/images/CLEVR_train_025511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the spheres and cubes, how many shapes are left?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.984375, "loss": 2.183098793029785 }, { "id": "identity_42904", "image": "DVQA/images/bar_train_00153866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'lip' and 'ball' algorithms on the 'wisdom' dataset?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 32.9375, "loss": 3.033188581466675 }, { "id": "identity_235410", "image": "UniGeo/images/1811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AB=AD=DC,若∠BAD=36°,求∠B和∠C的大小并验证∠B+∠C是否等于108°。" }, { "from": "gpt", "value": "The answer is ∠B = 72°, ∠C = 72°, and ∠B + ∠C ≠ 108°" } ], "clip_score": 31.59375, "loss": 0.7260543704032898 }, { "id": "identity_212016", "image": "A-OKVQA/images/237464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest umbrella in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.96875, "loss": 2.97499680519104 }, { "id": "identity_60396", "image": "DocVQA/images/psnm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the moderator for the panel discussion?" }, { "from": "gpt", "value": "The answer is P. Rambeau" } ], "clip_score": 17.71875, "loss": 3.09232759475708 }, { "id": "identity_36098", "image": "TabMWP/images/18207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The town council reported on how many stop signs there are on each street. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 38.71875, "loss": 3.109753370285034 }, { "id": "identity_30576", "image": "VQA2.0/images/81585.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these motorcycles or scooters?" }, { "from": "gpt", "value": "The answer is motorcycles" } ], "clip_score": 23.71875, "loss": 1.952047348022461 }, { "id": "identity_306030", "image": "GeoQA+/images/3952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在矩形ABCD中,AB=4,BC=2,P是矩形上方的一个点.且满足∠APB=90°,连接DP,则DP的最大值是()\n选择:\n(A) 2√{2}+2\n(B) 4√{2}\n(C) 2√{5}\n(D) 4√{2}+2" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.984375, "loss": 2.5094962120056152 }, { "id": "identity_1337", "image": "IconQA/images/23980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a sun. Which picture is seventh?\nChoices:\n(A) barn\n(B) tractor\n(C) sun" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.28125, "loss": 3.9756019115448 }, { "id": "identity_151130", "image": "ScienceQA/images/17018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the southeast of the highlighted state?" }, { "from": "gpt", "value": "The answer is Gulf of Mexico" } ], "clip_score": 28.09375, "loss": 2.1893258094787598 }, { "id": "identity_168517", "image": "TabMWP/images/32796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which park has the least number of Falcon sightings?" }, { "from": "gpt", "value": "The answer is Hillview Park" } ], "clip_score": 35.03125, "loss": 2.1645543575286865 }, { "id": "identity_245439", "image": "PMC-VQA/images/PMC5986059_ccr31539-fig-0001_323804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the histological characteristics of hyperkeratotic nodules and papules found on the toes, and how do they differ from those found on other parts of the foot?" }, { "from": "gpt", "value": "The answer is Thickened stratum corneum, acanthosis, and inflammatory cells" } ], "clip_score": 28.890625, "loss": 2.2713260650634766 }, { "id": "identity_99694", "image": "IconQA/images/89334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many erasers long is the colored pencil?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.578125, "loss": 2.524794578552246 }, { "id": "identity_224280", "image": "TabMWP/images/38154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total change in water level over the two months, and what is the average monthly change in water level?" }, { "from": "gpt", "value": "The answer is Total change is -2 inches, average change is -1 inch" } ], "clip_score": 40.78125, "loss": 2.306454658508301 }, { "id": "identity_299096", "image": "PMC-VQA/images/PMC9327211_Fig1_356939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to detect the hyper-metabolic lymph nodes in the patient? Was it:\n(A) MRI\n(B) PET\n(C) X-ray\n(D) CT" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.609375, "loss": 3.8897604942321777 }, { "id": "identity_21707", "image": "DocVQA/images/gqxw0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Date & time mentioned in the report?" }, { "from": "gpt", "value": "The answer is SEP 17'97 14:48" } ], "clip_score": 25.640625, "loss": 2.498385429382324 }, { "id": "identity_265501", "image": "DocVQA/images/grnk0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the content of the document?" }, { "from": "gpt", "value": "The answer is dtc brand status agenda" } ], "clip_score": 22.625, "loss": 2.891345262527466 }, { "id": "identity_224984", "image": "DocVQA/images/lgcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration in seconds for which the transmission lasted?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 19.625, "loss": 3.0247802734375 }, { "id": "identity_121617", "image": "A-OKVQA/images/464178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fence surrounding the giraffe enclosure?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 33.59375, "loss": 3.1054961681365967 }, { "id": "identity_99975", "image": "TabMWP/images/37893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in cost between a piece of elegant stationery and an orange envelope?" }, { "from": "gpt", "value": "The answer is $0.06" } ], "clip_score": 34.15625, "loss": 1.9003925323486328 }, { "id": "identity_252817", "image": "ScienceQA/images/5822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly to the west of the colony shown and what is its capital city?" }, { "from": "gpt", "value": "The answer is Connecticut, Hartford" } ], "clip_score": 27.46875, "loss": 2.8555870056152344 }, { "id": "identity_192897", "image": "PMC-VQA/images/PMC5946363_F1_313143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical orientation is indicated by the letters R, A, S, and L in the image?" }, { "from": "gpt", "value": "The answer is Right, Anterior, Superior, Left" } ], "clip_score": 27.65625, "loss": 1.5910353660583496 }, { "id": "identity_140757", "image": "DVQA/images/bar_train_00022041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the algorithm tested in the garage and the one tested in the branch?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.6875, "loss": 3.248918294906616 }, { "id": "identity_328874", "image": "GeoQA+/images/5281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given right triangle, ∠C=90°, AB=4, and BC=1. What is the value of cosA?\n\nChoices:\n(A) \\(\\frac{√{15}}{4}\\)\n(B) \\(\\frac{1}{4}\\)\n(C) √{15}\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.453125, "loss": 2.419951915740967 }, { "id": "identity_131618", "image": "TextVQA/images/b45d1f14b1d7c6f1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are these individuals participating in?" }, { "from": "gpt", "value": "The answer is Running event or race" } ], "clip_score": 21.1875, "loss": 3.4483070373535156 }, { "id": "identity_49152", "image": "DocVQA/images/jrgv0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is the 'Genetics Into Focus' discussion scheduled to start?" }, { "from": "gpt", "value": "The answer is 2.30 pm" } ], "clip_score": 24.890625, "loss": 1.8064630031585693 }, { "id": "identity_964", "image": "TQA/images/question_images/cycle_nitrogen_6712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many steps there are in the Nitrogen cycle?\nChoices:\n(A) 3\n(B) 6\n(C) 7\n(D) 5" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.078125, "loss": 2.525758743286133 }, { "id": "identity_301665", "image": "VQA2.0/images/417204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is on this?" }, { "from": "gpt", "value": "The answer is 25686" } ], "clip_score": 15.2890625, "loss": 1.3228613138198853 }, { "id": "identity_308978", "image": "DVQA/images/bar_train_00125430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars?" }, { "from": "gpt", "value": "The answer is animal" } ], "clip_score": 22.71875, "loss": 4.2169976234436035 }, { "id": "identity_176499", "image": "IconQA/images/16562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the flowers are in the top row?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 27.28125, "loss": 1.5629585981369019 }, { "id": "identity_136502", "image": "DVQA/images/bar_train_00080429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the 'sewage' dataset for 'muscle'?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 27.78125, "loss": 2.3969521522521973 }, { "id": "identity_226458", "image": "TextVQA/images/3d61184009535bb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What features does this toy offer to protect the device and enhance the learning experience for children aged 6+ months?" }, { "from": "gpt", "value": "The answer is Clear film, blocks home button, free learning apps" } ], "clip_score": 26.109375, "loss": 3.942197322845459 }, { "id": "identity_157383", "image": "TQA/images/question_images/parts_fish_2852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the fin that helps the fish to stabilize and steer while swimming" }, { "from": "gpt", "value": "The answer is Dorsal Fins" } ], "clip_score": 33.375, "loss": 2.049837827682495 }, { "id": "identity_111621", "image": "Super-CLEVR/images/superCLEVR_new_000241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the vehicle have?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.15625, "loss": 1.97252357006073 }, { "id": "identity_130658", "image": "Super-CLEVR/images/superCLEVR_new_000252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have only two wheels?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.0, "loss": 2.415996551513672 }, { "id": "identity_251969", "image": "PMC-VQA/images/PMC7082865_fig3-2324709620912101_138516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the thoracic CT scan findings, what are the possible underlying conditions that could lead to diffuse interstitial infiltrates, septal thickening, ground-glass opacities, and bilateral pleural effusions, and how would you differentiate between them?" }, { "from": "gpt", "value": "The answer is Interstitial Lung Disease, Pulmonary Edema, Infection, Pulmonary Fibrosis" } ], "clip_score": 31.796875, "loss": 1.8558392524719238 }, { "id": "identity_98428", "image": "IconQA/images/82010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more turtles or fish in the image?" }, { "from": "gpt", "value": "The answer is More fish" } ], "clip_score": 27.078125, "loss": 3.378615379333496 }, { "id": "identity_120324", "image": "Super-CLEVR/images/superCLEVR_new_001999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the purple object larger than the green bicycle?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.828125, "loss": 3.332393169403076 }, { "id": "identity_309917", "image": "DocVQA/images/hqvn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of J.-M. Renaud?" }, { "from": "gpt", "value": "The answer is Convener SG Pesticides" } ], "clip_score": 21.25, "loss": 3.5074853897094727 }, { "id": "identity_67602", "image": "TQA/images/abc_question_images/cycle_nitrogen_16718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter represents the process of nitrogen fixation in the diagram?" }, { "from": "gpt", "value": "The answer is K" } ], "clip_score": 27.390625, "loss": 2.679900884628296 }, { "id": "identity_193246", "image": "PMC-VQA/images/PMC7118815_Fig3_144434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any soft tissue abnormalities in the image?" }, { "from": "gpt", "value": "The answer is No obvious soft tissue abnormalities" } ], "clip_score": 27.09375, "loss": 2.904961585998535 }, { "id": "identity_62305", "image": "DocVQA/images/ykdv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material composition of the paper used for this document?" }, { "from": "gpt", "value": "The answer is 25% COTTON FIBER U.S.A" } ], "clip_score": 24.09375, "loss": 2.2723641395568848 }, { "id": "identity_95990", "image": "TabMWP/images/16894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Marie need to buy a sewing kit and a tube of toothpaste?" }, { "from": "gpt", "value": "The answer is $1.95" } ], "clip_score": 29.984375, "loss": 1.880791187286377 }, { "id": "identity_326095", "image": "IconQA/images/101354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Luke is waking up. The clock shows the time. What time is it?\nChoices:\n(A) 8:00 P.M.\n(B) 8:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.84375, "loss": 3.5950305461883545 }, { "id": "identity_296552", "image": "GeoQA+/images/1060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, triangle ABC is inscribed in circle O, where OD is perpendicular to BC at D and angle DOC is 50°. What is the degree measure of angle A?\nChoices:\n(A) 40°\n(B) 45°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.5625, "loss": 2.608447551727295 }, { "id": "identity_74503", "image": "AI2D/images/2188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the process of an adult male emerging from its test?" }, { "from": "gpt", "value": "The answer is Emerged" } ], "clip_score": 26.15625, "loss": 3.591240167617798 }, { "id": "identity_96228", "image": "IconQA/images/17111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are squares but not green?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.484375, "loss": 2.4189810752868652 }, { "id": "identity_320838", "image": "VQA-AS/images/14263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being held?" }, { "from": "gpt", "value": "The answer is frisbee" } ], "clip_score": 29.25, "loss": 1.8518526554107666 }, { "id": "identity_24765", "image": "A-OKVQA/images/357572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the man holding a large sign?\nChoices:\n(A) to protest\n(B) to paint\n(C) to celebrate\n(D) to advertise" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.234375, "loss": 3.5382285118103027 }, { "id": "identity_58737", "image": "DocVQA/images/nxmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the employee name mentioned in the document?" }, { "from": "gpt", "value": "The answer is GEORGE R MORRISON" } ], "clip_score": 23.5625, "loss": 2.5661699771881104 }, { "id": "identity_266515", "image": "TabMWP/images/28721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between a chocolate cheesecake and an almond tart?" }, { "from": "gpt", "value": "The answer is 0.66" } ], "clip_score": 31.453125, "loss": 2.613053560256958 }, { "id": "identity_212922", "image": "VizWiz/images/VizWiz_train_00015983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color scheme is used on the CD label?" }, { "from": "gpt", "value": "The answer is Orange and white" } ], "clip_score": 27.640625, "loss": 3.0150811672210693 }, { "id": "identity_155388", "image": "ScienceQA/images/6093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface the animal is standing on?" }, { "from": "gpt", "value": "The answer is Rough, likely ground or land" } ], "clip_score": 22.34375, "loss": 4.412092208862305 }, { "id": "identity_34887", "image": "TabMWP/images/921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A vendor who sold bread at farmers' markets counted the number of leftover loaves at the end of each day. According to the table, what was the rate of change between Sunday and Monday?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 37.3125, "loss": 2.3855702877044678 }, { "id": "identity_214211", "image": "VQA2.0/images/26049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the motorcycle's turn signals?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 23.546875, "loss": 3.2666354179382324 }, { "id": "identity_279242", "image": "PMC-VQA/images/PMC6331538_F9_422269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the inset of Figure 2G?\nChoices:\n(A) Labeling on cell membranes. \n(B) Higher magnification image. \n(C) Small cytoplasmic molecules. \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.359375, "loss": 4.203310966491699 }, { "id": "identity_268230", "image": "VizWiz/images/VizWiz_train_00017351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is buckle" } ], "clip_score": 22.6875, "loss": 3.5695536136627197 }, { "id": "identity_130470", "image": "VQA2.0/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of day is depicted in the image?" }, { "from": "gpt", "value": "The answer is Either early morning or evening" } ], "clip_score": 19.1875, "loss": 3.942722797393799 }, { "id": "identity_83116", "image": "GeoQA+/images/3409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume of the conical chimney cap?" }, { "from": "gpt", "value": "The answer is 16000π cm^3" } ], "clip_score": 33.4375, "loss": 2.334531545639038 }, { "id": "identity_297946", "image": "TabMWP/images/35084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money did Eli spend at the store when he bought 4 pounds of light brown sugar, 4 pounds of powdered sugar, and 3 pounds of granulated sugar?" }, { "from": "gpt", "value": "The answer is 60.68" } ], "clip_score": 33.625, "loss": 2.5780458450317383 }, { "id": "identity_174072", "image": "IconQA/images/58890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this 3D shape have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.34375, "loss": 4.31964635848999 }, { "id": "identity_18733", "image": "AI2D/images/867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain, which organism eats only plants?\nChoices:\n(A) hawk\n(B) scorpion\n(C) coyote\n(D) grasshoppers and rabbits" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.421875, "loss": 3.2389674186706543 }, { "id": "identity_225172", "image": "FigureQA/images/3806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage between the largest and smallest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 32.25, "loss": 1.743796706199646 }, { "id": "identity_221544", "image": "VQA2.0/images/233644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of collar is the man's shirt featuring?" }, { "from": "gpt", "value": "The answer is Spread collar" } ], "clip_score": 23.25, "loss": 2.9144256114959717 }, { "id": "identity_272821", "image": "IconQA/images/5498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the shape open or closed? \nOptions:\n(A) closed\n(B) open" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.671875, "loss": 4.6234846115112305 }, { "id": "identity_295580", "image": "TabMWP/images/20010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each bucket contains 5 yellow blocks, how many yellow blocks are there in a total of 3 buckets?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.046875, "loss": 1.7601653337478638 }, { "id": "identity_6095", "image": "VQA2.0/images/124609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in red?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 14.375, "loss": 2.3694891929626465 }, { "id": "identity_145732", "image": "DocVQA/images/jsnh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number for the Denver, Colorado area office?" }, { "from": "gpt", "value": "The answer is 303/893-4600" } ], "clip_score": 16.984375, "loss": 1.8702924251556396 }, { "id": "identity_130813", "image": "VQA-AS/images/7135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rug on the floor?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.390625, "loss": 3.027155637741089 }, { "id": "identity_3572", "image": "AI2D/images/3663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The largest artery in the human body is:\nChoices:\n(A) Superior vena cava\n(B) Left pulmonary artery\n(C) Aorta\n(D) Right pulmonary artery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.15625, "loss": 3.3894875049591064 }, { "id": "identity_227494", "image": "ScienceQA/images/20037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key morphological features of Livyatan melvillei that distinguish it from other prehistoric whales, and how do these features relate to its predatory behavior?" }, { "from": "gpt", "value": "The answer is Robust skull, large teeth, strong jaws, large size, apex predator" } ], "clip_score": 31.375, "loss": 2.9792816638946533 }, { "id": "identity_333456", "image": "TextVQA/images/eb5c0a477048e7d7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in quotation marks?" }, { "from": "gpt", "value": "The answer is \"el eden\"" } ], "clip_score": 14.8828125, "loss": 3.125840425491333 }, { "id": "identity_329380", "image": "ScienceQA/images/372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Augusta\n(B) New York City\n(C) Manchester\n(D) Concord" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.9375, "loss": 3.6642284393310547 }, { "id": "identity_317785", "image": "TabMWP/images/8266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $715, is there a shortage or something else?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 3.6990151405334473 }, { "id": "identity_70243", "image": "AI2D/images/4561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic distinguishes Spearmint from the other mint plants in the diagram?" }, { "from": "gpt", "value": "The answer is Its leaves are more elongated and pointed" } ], "clip_score": 33.625, "loss": 2.774350881576538 }, { "id": "identity_224726", "image": "FigureQA/images/3111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Crimson and Brown compared to Dark Orange?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 37.28125, "loss": 3.8546817302703857 }, { "id": "identity_217028", "image": "PMC-VQA/images/PMC7532002_fig2_265128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled with \"f\" in the image?" }, { "from": "gpt", "value": "The answer is Absorption of nutrients and minerals from food" } ], "clip_score": 18.28125, "loss": 2.5763072967529297 }, { "id": "identity_125664", "image": "TextVQA/images/81f62dc25199fb80.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the helicopter?" }, { "from": "gpt", "value": "The answer is Dark Gray/Black" } ], "clip_score": 28.203125, "loss": 3.8851420879364014 }, { "id": "identity_67955", "image": "TQA/images/question_images/photosynthesis_7480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What substance is produced by the plant and stored in the roots according to the diagram?" }, { "from": "gpt", "value": "The answer is Excess sugar is stored as starch in the roots" } ], "clip_score": 27.171875, "loss": 1.5922497510910034 }, { "id": "identity_106265", "image": "A-OKVQA/images/100581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the closest person's shirt?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 9.6484375, "loss": 4.342813968658447 }, { "id": "identity_92737", "image": "IconQA/images/9735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many grid squares did the letter Q move horizontally during the transformation?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.65625, "loss": 2.277336835861206 }, { "id": "identity_206978", "image": "A-OKVQA/images/385448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many scooters are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.90625, "loss": 4.254673480987549 }, { "id": "identity_310416", "image": "Geometry3K/images/2065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the length of A B in the given triangle.\nChoices:\n(A) 1.5\n(B) 7\n(C) 11\n(D) 12.5" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.21875, "loss": 3.0528271198272705 }, { "id": "identity_242327", "image": "DocVQA/images/rqhl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount mentioned in the letter if the check amount is doubled and then increased by $5.00?" }, { "from": "gpt", "value": "The answer is $25.20" } ], "clip_score": 19.96875, "loss": 2.2396302223205566 }, { "id": "identity_295697", "image": "TQA/images/question_images/earth_moon_phases_1256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When the moon is directly behind the earth as observed from the earth, which phase of the moon occurs? \n(A) 2nd quarter\n(B) full moon\n(C) 1st quarter\n(D) new moon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.125, "loss": 4.312221050262451 }, { "id": "identity_99352", "image": "IconQA/images/84298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of edges present in all the shapes combined?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 20.1875, "loss": 2.4861371517181396 }, { "id": "identity_297686", "image": "PMC-VQA/images/PMC4663278_F1_448653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to uncover hypermetabolism of the right anterior mesial temporal lobe?\nChoices:\n(A) Magnetic resonance imaging (MRI)\n(B) Cerebral fluor-18-deoxyglucose-positron emission tomography/computed tomography (FDG-PET/-CT)\n(C) Computed tomography (CT)\n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 3.679981231689453 }, { "id": "identity_98796", "image": "TabMWP/images/30864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of e-mails sent?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 31.46875, "loss": 2.5601985454559326 }, { "id": "identity_36307", "image": "VQA-AS/images/3729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of ball is by the log?" }, { "from": "gpt", "value": "The answer is soccer" } ], "clip_score": 24.359375, "loss": 2.5675132274627686 }, { "id": "identity_66586", "image": "TQA/images/question_images/erosion_6862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the feature that is completely submerged during high tide?" }, { "from": "gpt", "value": "The answer is stump covered at high tide" } ], "clip_score": 27.21875, "loss": 2.0287163257598877 }, { "id": "identity_300759", "image": "AI2D/images/4210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Looking at the diagram above, which step is showing magma?\nChoices:\n(A) D\n(B) A\n(C) I\n(D) C" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.109375, "loss": 3.7938904762268066 }, { "id": "identity_11166", "image": "A-OKVQA/images/64961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The shade held by the teal umbrella pole was crafted in which manner?\nChoices:\n(A) carving\n(B) sewing machine\n(C) weaving\n(D) axe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.40625, "loss": 4.204170227050781 }, { "id": "identity_92669", "image": "IconQA/images/73148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of teal to purple circles?" }, { "from": "gpt", "value": "The answer is 2:5" } ], "clip_score": 36.5625, "loss": 1.6403698921203613 }, { "id": "identity_169009", "image": "IconQA/images/5674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are there in total?" }, { "from": "gpt", "value": "The answer is There are 10 images in total" } ], "clip_score": 23.15625, "loss": 2.0101470947265625 }, { "id": "identity_82183", "image": "UniGeo/images/1524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AD is parallel to BE and CF, and lines l1 and l2 intersect these parallel lines at points A, B, C, and D, E, F respectively, and the length of AB is 1 unit, BC is 2 units, and DE is 1.5 units, what is the total length of AD, BE, and CF combined?" }, { "from": "gpt", "value": "The answer is 6 units" } ], "clip_score": 32.28125, "loss": 1.9047349691390991 }, { "id": "identity_120893", "image": "TextVQA/images/5e3434e835b3fb35.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the top left image represent?" }, { "from": "gpt", "value": "The answer is Night" } ], "clip_score": 18.21875, "loss": 4.1334123611450195 }, { "id": "identity_19073", "image": "GeoQA+/images/5511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,∠C=35°,则∠OBA的度数为()\nChoices:\n(A) 50°\n(B) 60°\n(C) 70°\n(D) 55°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.71875, "loss": 2.55147123336792 }, { "id": "identity_13356", "image": "FigureQA/images/56770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.265625, "loss": 4.020548343658447 }, { "id": "identity_304563", "image": "TabMWP/images/22193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy 9 of these?" }, { "from": "gpt", "value": "The answer is 8.46" } ], "clip_score": 21.421875, "loss": 1.9888360500335693 }, { "id": "identity_147200", "image": "MapQA/images/map_7640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any state have a value exactly equal to 40?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 20.53125, "loss": 2.9203011989593506 }, { "id": "identity_141056", "image": "DVQA/images/bar_train_00011608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value difference between the element represented by stars and the element represented by diagonal lines in the category \"supply\"?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 21.5625, "loss": 1.9527126550674438 }, { "id": "identity_194318", "image": "TextVQA/images/015329f53f79d0fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the television network on the outfield wall?" }, { "from": "gpt", "value": "The answer is FSN Bay Area" } ], "clip_score": 29.0, "loss": 2.2487850189208984 }, { "id": "identity_304359", "image": "VizWiz/images/VizWiz_train_00013036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How full is this container?" }, { "from": "gpt", "value": "The answer is 2 3" } ], "clip_score": 19.71875, "loss": 2.836264133453369 }, { "id": "identity_100992", "image": "IconQA/images/10356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more yellow clothespins or orange clothespins?" }, { "from": "gpt", "value": "The answer is More yellow clothespins" } ], "clip_score": 28.40625, "loss": 2.1631059646606445 }, { "id": "identity_7024", "image": "UniGeo/images/3354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的直径,C、D为⊙O上两点,∠ABC=60°,则∠D的度数为()\nChoices:\n(A) 60°\n(B) 30°\n(C) 45°\n(D) 75°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.71875, "loss": 2.808032274246216 }, { "id": "identity_113672", "image": "PMC-VQA/images/PMC4877344_Fig5_33008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image in the top row corresponds to the b0 value?" }, { "from": "gpt", "value": "The answer is The first image on the left in the top row" } ], "clip_score": 25.234375, "loss": 2.200467824935913 }, { "id": "identity_217828", "image": "TextVQA/images/7e12d937e6e90065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the stove?" }, { "from": "gpt", "value": "The answer is Frigidaire" } ], "clip_score": 28.203125, "loss": 2.1607162952423096 }, { "id": "identity_168975", "image": "IconQA/images/73664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this shape have any right angles?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.546875, "loss": 3.0061116218566895 }, { "id": "identity_170265", "image": "IconQA/images/71974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shapes are circles?" }, { "from": "gpt", "value": "The answer is 1/6" } ], "clip_score": 25.03125, "loss": 1.5552818775177002 }, { "id": "identity_175815", "image": "IconQA/images/83372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern of the sequence?" }, { "from": "gpt", "value": "The answer is Rocket, Moon, Rocket, Star" } ], "clip_score": 28.59375, "loss": 2.0551681518554688 }, { "id": "identity_320689", "image": "FigureQA/images/79099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Indigo the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.25, "loss": 3.8868699073791504 }, { "id": "identity_103451", "image": "VQA-RAD/images/synpic30273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the kidneys be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.140625, "loss": 2.986531972885132 }, { "id": "identity_71944", "image": "TQA/images/question_images/human_system_digestive_3694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first part of the digestive system where food enters?" }, { "from": "gpt", "value": "The answer is Pharynx" } ], "clip_score": 30.578125, "loss": 2.716691255569458 }, { "id": "identity_66126", "image": "TQA/images/abc_question_images/optics_lense_types_19161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of image is formed at point Y by the convex lens?" }, { "from": "gpt", "value": "The answer is Real and inverted image" } ], "clip_score": 32.125, "loss": 2.685537099838257 }, { "id": "identity_285714", "image": "VQA2.0/images/524559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where can the judges for this sport be found?" }, { "from": "gpt", "value": "The answer is along blue wall and on ladder" } ], "clip_score": 27.125, "loss": 3.7867279052734375 }, { "id": "identity_50517", "image": "DVQA/images/bar_train_00123601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between \"flow\" and \"dive\" for the label \"wine\"?" }, { "from": "gpt", "value": "The answer is -1" } ], "clip_score": 25.1875, "loss": 2.7797539234161377 }, { "id": "identity_265632", "image": "DVQA/images/bar_train_00094869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many datasets do algorithms have an accuracy greater than 9?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 36.0625, "loss": 3.1798202991485596 }, { "id": "identity_71455", "image": "ScienceQA/images/5560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food chain in this ecosystem?" }, { "from": "gpt", "value": "The answer is Grizzly bear" } ], "clip_score": 30.109375, "loss": 1.9034199714660645 }, { "id": "identity_66081", "image": "ScienceQA/images/1945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the centers of the two magnets in Pair 1?" }, { "from": "gpt", "value": "The answer is 1.5 inches" } ], "clip_score": 27.75, "loss": 1.3349642753601074 }, { "id": "identity_257916", "image": "VQA2.0/images/299836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are on the stand where the basket is located, and what are they?" }, { "from": "gpt", "value": "The answer is Two items: a basket and a plant" } ], "clip_score": 14.40625, "loss": 2.8357632160186768 }, { "id": "identity_31165", "image": "A-OKVQA/images/477919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type environment are these fruits grown in?\nChoices:\n(A) tropical\n(B) desert\n(C) tundra\n(D) temperate" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.75, "loss": 4.191421985626221 }, { "id": "identity_324534", "image": "IconQA/images/100387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there enough flowers for each butterfly?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.0, "loss": 3.491986036300659 }, { "id": "identity_103113", "image": "CLEVR-Math/images/CLEVR_train_020190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.265625, "loss": 2.495211601257324 }, { "id": "identity_52293", "image": "FigureQA/images/81260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Cyan line ever surpass the Purple line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.171875, "loss": 2.9950039386749268 }, { "id": "identity_314674", "image": "GeoQA+/images/695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O上一点,弦AD平分∠BAC,交弦BC于点E,CD=4,DE=2,则AE的长为多少?\n选项:\n(A) 2\n(B) 4\n(C) 6\n(D) 8" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0625, "loss": 2.4680559635162354 }, { "id": "identity_57426", "image": "DocVQA/images/jldg0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cost associated with the Field Director (Thai) in Phase IV?" }, { "from": "gpt", "value": "The answer is $6,000" } ], "clip_score": 20.765625, "loss": 1.8063642978668213 }, { "id": "identity_282290", "image": "DocVQA/images/nnng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which value is most frequently mentioned in the transmission report?" }, { "from": "gpt", "value": "The answer is ec" } ], "clip_score": 25.203125, "loss": 5.826906681060791 }, { "id": "identity_180658", "image": "TabMWP/images/31981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stops does the train make before reaching Magician Village?" }, { "from": "gpt", "value": "The answer is 3 stops" } ], "clip_score": 24.46875, "loss": 2.549649477005005 }, { "id": "identity_226491", "image": "Geometry3K/images/348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the perimeter of \\(\\triangle ABC\\) if \\(\\triangle ABC\\) is an isosceles triangle with \\( AB \\cong BC \\)" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 36.65625, "loss": 3.2502970695495605 }, { "id": "identity_271763", "image": "GeoQA+/images/5538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In a right-angled triangle as shown (with ∠F = ∠ACB = 90°, ∠E = 45°, ∠A = 60°), if point C lies on the extension of FD, point B lies on DE, and AB is parallel to CF, what is the degree measure of ∠DBC?\nChoices:\n(A) 10°\n(B) 15°\n(C) 18°\n(D) 30°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.171875, "loss": 2.5632123947143555 }, { "id": "identity_305974", "image": "PMC-VQA/images/PMC3732620_fig2_222368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vertebrae are fused together in this image?\nChoices:\n(A) L2 and L3 \n(B) L3 and L4 \n(C) L1 and L2 \n(D) L5 and S1" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.375, "loss": 3.741656541824341 }, { "id": "identity_24247", "image": "DocVQA/images/pkmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the employee name given in the document?" }, { "from": "gpt", "value": "The answer is FRANCES ELLEN BROCK" } ], "clip_score": 23.265625, "loss": 3.292647123336792 }, { "id": "identity_330772", "image": "PMC-VQA/images/PMC6494259_F1_473982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does this appear on T1WI?\nChoices:\n(A) hypointense\n(B) hyperintense\n(C) isointense\n(D) none of the above." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.84375, "loss": 3.5835976600646973 }, { "id": "identity_115346", "image": "VQA2.0/images/316363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the kites?" }, { "from": "gpt", "value": "The answer is Multicolored, no single predominant color" } ], "clip_score": 26.953125, "loss": 2.6674935817718506 }, { "id": "identity_255342", "image": "TabMWP/images/7307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of the crystal vase, rug, and chandelier, and how much more does this total cost than the crystal sculpture?" }, { "from": "gpt", "value": "The answer is 604" } ], "clip_score": 32.0625, "loss": 3.682223320007324 }, { "id": "identity_192356", "image": "TextVQA/images/f23e925bb9700895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the genre of the book based on the cover art?" }, { "from": "gpt", "value": "The answer is Thriller or Crime genre" } ], "clip_score": 22.671875, "loss": 3.406385898590088 }, { "id": "identity_324769", "image": "A-OKVQA/images/552446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color shirt is worn by the person enforcing the rules of the game?\nChoices:\n(A) grey\n(B) white\n(C) red\n(D) black" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.01171875, "loss": 3.876039981842041 }, { "id": "identity_334452", "image": "TextVQA/images/4349b706639fab2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time zone is shown on the far left?" }, { "from": "gpt", "value": "The answer is portland" } ], "clip_score": 19.71875, "loss": 4.145331382751465 }, { "id": "identity_196897", "image": "PMC-VQA/images/PMC4101852_F1_306828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure labeled with number 2 in the image?" }, { "from": "gpt", "value": "The answer is Iliac bones" } ], "clip_score": 28.59375, "loss": 3.599795341491699 }, { "id": "identity_162869", "image": "GeoQA+/images/154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the right-angled triangle ABC?" }, { "from": "gpt", "value": "The answer is 7.5 square meters" } ], "clip_score": 31.71875, "loss": 1.8193273544311523 }, { "id": "identity_159127", "image": "GeoQA+/images/2721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ∠OAB in rhombus OABC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.5, "loss": 1.8944103717803955 }, { "id": "identity_327892", "image": "PMC-VQA/images/PMC3504249_fig2_167977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the gaseous necrosis located in the CT scan?\nChoices:\n(A) Along the vertebral column and spinal cord.\n(B) Around the lungs and thorax.\n(C) Along the iliopsoas and anterior abdominal wall.\n(D) Around the heart and mediastinum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.828125, "loss": 3.857207775115967 }, { "id": "identity_151585", "image": "AI2D/images/3098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled 10, and what is its primary function?" }, { "from": "gpt", "value": "The answer is Cell membrane, controls substance movement" } ], "clip_score": 25.84375, "loss": 4.272494792938232 }, { "id": "identity_225930", "image": "DVQA/images/bar_train_00138904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms for the datasets wealth, claim, and fellow across all categories?" }, { "from": "gpt", "value": "The answer is approximately 5.44" } ], "clip_score": 36.6875, "loss": 2.6550209522247314 }, { "id": "identity_155094", "image": "ScienceQA/images/5682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the object made of?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 28.09375, "loss": 2.7913978099823 }, { "id": "identity_183700", "image": "VQA2.0/images/52606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the van parked on the street?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 16.515625, "loss": 2.697781562805176 }, { "id": "identity_242601", "image": "A-OKVQA/images/49119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary differences between a freighter and a sailboat in terms of their design and purpose?" }, { "from": "gpt", "value": "The answer is Freighter is for cargo; sailboat is for recreation" } ], "clip_score": 19.71875, "loss": 2.6679482460021973 }, { "id": "identity_86238", "image": "UniGeo/images/1664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If BC = 8 and EF = 1/3 DF, what is the ratio of the area of triangle DEF to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 32.6875, "loss": 1.6862738132476807 }, { "id": "identity_141546", "image": "MapQA/images/map_4927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the population of Florida greater than 3 million?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.03125, "loss": 2.9427895545959473 }, { "id": "identity_292321", "image": "Geometry3K/images/414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that CE = t - 2, EB = t + 1, CD = 2, and CA = 10, what is the value of t?\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 2.6442034244537354 }, { "id": "identity_227342", "image": "TabMWP/images/33106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is the bat signed by a famous baseball player compared to the combined cost of the photograph signed by a famous movie star and the book signed by a famous author?" }, { "from": "gpt", "value": "The answer is 1,640" } ], "clip_score": 27.6875, "loss": 2.466378688812256 }, { "id": "identity_83745", "image": "GeoQA+/images/332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BOC in the given circle?" }, { "from": "gpt", "value": "The answer is 78 degrees" } ], "clip_score": 33.8125, "loss": 2.6028482913970947 }, { "id": "identity_56843", "image": "DVQA/images/bar_train_00050509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the category \"access\" have a value greater than 5 in any of the periods?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.5, "loss": 3.0166287422180176 }, { "id": "identity_168221", "image": "TabMWP/images/902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected hot air balloon is yellow?" }, { "from": "gpt", "value": "The answer is 10/19" } ], "clip_score": 19.734375, "loss": 2.345698833465576 }, { "id": "identity_263171", "image": "AI2D/images/2156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this diagram, how many stages are there?\nChoices:\n(A) 8\n(B) 5\n(C) 2\n(D) 7" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.4375, "loss": 3.03092885017395 }, { "id": "identity_144960", "image": "DVQA/images/bar_train_00064101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the 'till' dataset for the 'defeat' algorithm?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 34.09375, "loss": 2.633484363555908 }, { "id": "identity_76560", "image": "AI2D/images/2566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many degrees does Earth's orbit tilt relative to the sun?" }, { "from": "gpt", "value": "The answer is 24° (90° - 66° = 24°)" } ], "clip_score": 26.9375, "loss": 1.46346914768219 }, { "id": "identity_276125", "image": "PMC-VQA/images/PMC2917065_fig01_70861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of images displayed in the figure? Is it:\n(A) CT Scans\n(B) MRI\n(C) X-Rays\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.015625, "loss": 4.503623008728027 }, { "id": "identity_272716", "image": "DVQA/images/bar_train_00159135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars solid colors without any patterns on them?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.484375, "loss": 3.62638521194458 }, { "id": "identity_139954", "image": "DocVQA/images/qpjb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for the award mentioned on the certificate?" }, { "from": "gpt", "value": "The answer is In Recognition of Distinguished Service to Public Health" } ], "clip_score": 27.78125, "loss": 1.3260446786880493 }, { "id": "identity_226106", "image": "ScienceQA/images/10682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted on the map and name its capital. Then, list two major cities in this state other than its capital" }, { "from": "gpt", "value": "The answer is Maryland, Annapolis, Baltimore, Frederick" } ], "clip_score": 27.3125, "loss": 2.919562339782715 }, { "id": "identity_275855", "image": "ScienceQA/images/9822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What state of matter does an arrowhead belong to: solid, liquid, or gas?\nChoices:\n(A) a liquid\n(B) a gas\n(C) a solid" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.59375, "loss": 3.8056397438049316 }, { "id": "identity_116265", "image": "PMC-VQA/images/PMC8155169_Fig4_452794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Mediolateral oblique (MLO) view" } ], "clip_score": 28.828125, "loss": 3.3023312091827393 }, { "id": "identity_285036", "image": "TabMWP/images/5416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals scored between 26 and 43 points on the quiz show that Jared researched for his math assignment?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.390625, "loss": 2.2771410942077637 }, { "id": "identity_15387", "image": "FigureQA/images/32417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold less than Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.7578125, "loss": 3.636946201324463 }, { "id": "identity_28599", "image": "Super-CLEVR/images/superCLEVR_new_000192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the school bus that is the same material as the tiny blue object?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 21.015625, "loss": 3.864475727081299 }, { "id": "identity_14376", "image": "AI2D/images/2196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the life cycle phase at point B is egg, what life cycle phase is at point D?\nChoices:\n(A) Egg\n(B) Adult stage\n(C) Larval stage\n(D) Pupal stage" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.765625, "loss": 3.4386539459228516 }, { "id": "identity_127385", "image": "Super-CLEVR/images/superCLEVR_new_001132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object that is in front of the large rubber car and behind the blue motorcycle?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 24.953125, "loss": 3.3848161697387695 }, { "id": "identity_200851", "image": "TextVQA/images/fe858788a06ba30b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the cheerleaders' uniforms?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 30.84375, "loss": 3.925177812576294 }, { "id": "identity_263706", "image": "IconQA/images/94730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock in Samuel's car show as he is returning home from work in the evening?\nChoices:\n(A) 6:00 P.M.\n(B) 6:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.9375, "loss": 3.708061933517456 }, { "id": "identity_58885", "image": "DVQA/images/bar_train_00157624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stores have sold more 'fear' items than 'past' items?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.28125, "loss": 2.4295742511749268 }, { "id": "identity_97632", "image": "IconQA/images/18505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Meg started gardening when the clock showed 3:00 PM, how long has she been gardening by 4:00 PM?" }, { "from": "gpt", "value": "The answer is 1 hour" } ], "clip_score": 27.953125, "loss": 1.7941793203353882 }, { "id": "identity_234128", "image": "ScienceQA/images/19814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the ratio of the magnetic forces between Pair 1 and Pair 2, given that the magnetic force between two magnets is inversely proportional to the square of the distance between them" }, { "from": "gpt", "value": "The answer is \\( \\frac{25}{9} \\)" } ], "clip_score": 19.796875, "loss": 2.5547866821289062 }, { "id": "identity_177168", "image": "IconQA/images/99513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is colored blue?" }, { "from": "gpt", "value": "The answer is 2/3" } ], "clip_score": 30.296875, "loss": 1.473197340965271 }, { "id": "identity_160649", "image": "GeoQA+/images/5556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of angles 1, 2, and 3 in the given image of the combination of six equal-sided squares?" }, { "from": "gpt", "value": "The answer is 270 degrees" } ], "clip_score": 33.25, "loss": 1.921854019165039 }, { "id": "identity_181974", "image": "A-OKVQA/images/33215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.078125, "loss": 3.110478401184082 }, { "id": "identity_130782", "image": "A-OKVQA/images/235466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible brooms are there in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.953125, "loss": 4.241278648376465 }, { "id": "identity_258420", "image": "A-OKVQA/images/92804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the activities of all the skiers in the image, and how do their actions relate to each other?" }, { "from": "gpt", "value": "The answer is The leftmost skier is seeking shelter; others are skiing downhill" } ], "clip_score": 23.71875, "loss": 3.4579854011535645 }, { "id": "identity_7807", "image": "ChartQA/images/multi_col_40141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color does Never heard of represent?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 18.734375, "loss": 3.176553964614868 }, { "id": "identity_63287", "image": "ScienceQA/images/3740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the \"rainbow sucker\" primarily composed of?" }, { "from": "gpt", "value": "The answer is Circle" } ], "clip_score": 9.8671875, "loss": 2.913642406463623 }, { "id": "identity_12615", "image": "TQA/images/question_images/stratigraphy_9265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the oldest layer in the diagram?\nChoices:\n(A) L\n(B) E\n(C) A\n(D) G" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.765625, "loss": 4.290520668029785 }, { "id": "identity_33757", "image": "DocVQA/images/qhpm0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: mention the page number at bottom right corner of the page ?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 16.234375, "loss": 2.6044065952301025 }, { "id": "identity_3342", "image": "VQA2.0/images/119199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it a cloudy or sunny day?" }, { "from": "gpt", "value": "The answer is cloudy" } ], "clip_score": 16.265625, "loss": 2.296966552734375 }, { "id": "identity_116309", "image": "TextVQA/images/230c50f5e12533a8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the surface the keyboard is placed on?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 23.46875, "loss": 3.0952208042144775 }, { "id": "identity_186774", "image": "TextVQA/images/23d2ec2ae5b21d23.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which author's name appears most frequently on the book covers?" }, { "from": "gpt", "value": "The answer is David Brin" } ], "clip_score": 30.03125, "loss": 1.9099944829940796 }, { "id": "identity_262300", "image": "ScienceQA/images/18220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Hawaii?\nChoices:\n(A) Hilo\n(B) Portland\n(C) Phoenix\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.1875, "loss": 3.841933488845825 }, { "id": "identity_39380", "image": "DocVQA/images/gqlp0000_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the distributor mentioned in the document?" }, { "from": "gpt", "value": "The answer is Sosnick - Fresno" } ], "clip_score": 27.421875, "loss": 4.578822135925293 }, { "id": "identity_245121", "image": "PMC-VQA/images/PMC5411224_F2_158445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key radiographic features that indicate the patient underwent Total Hip Arthroplasty, and how do these features differentiate it from the other surgical procedures listed?" }, { "from": "gpt", "value": "The answer is Presence of femoral stem and acetabular cup in hip joint" } ], "clip_score": 31.65625, "loss": 2.5504610538482666 }, { "id": "identity_93074", "image": "IconQA/images/65127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many grid squares did the letter Q move vertically during the transformation?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 33.03125, "loss": 2.620535135269165 }, { "id": "identity_12121", "image": "TextVQA/images/353e68e3e4336688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is the sign saying this spot is reserved for members or nonmembers?" }, { "from": "gpt", "value": "The answer is members" } ], "clip_score": 30.890625, "loss": 3.2044758796691895 }, { "id": "identity_154546", "image": "ScienceQA/images/6158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the magnets in both pairs?" }, { "from": "gpt", "value": "The answer is Blue and Green" } ], "clip_score": 32.1875, "loss": 2.682528257369995 }, { "id": "identity_79638", "image": "UniGeo/images/1017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of the circle is r, what is the length of segment OC?" }, { "from": "gpt", "value": "The answer is r" } ], "clip_score": 33.625, "loss": 3.0394539833068848 }, { "id": "identity_285257", "image": "ScienceQA/images/12985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\nChoices:\n(A) Cedar Rapids\n(B) Indianapolis\n(C) Lincoln\n(D) Des Moines" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.609375, "loss": 3.7258248329162598 }, { "id": "identity_139208", "image": "DocVQA/images/srpf0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many specific types of expenses are listed as not covered under the TIIA Major Medical insurance?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.90625, "loss": 2.4373817443847656 }, { "id": "identity_188241", "image": "PMC-VQA/images/PMC6025296_jcm-07-00116-f001_335403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the organ located to the left of the tumor as indicated by the arrow?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 30.484375, "loss": 4.173713684082031 }, { "id": "identity_167975", "image": "TabMWP/images/35434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two drawers are removed, how many coats will be left?" }, { "from": "gpt", "value": "The answer is 4 coats" } ], "clip_score": 28.671875, "loss": 1.9083757400512695 }, { "id": "identity_58064", "image": "DocVQA/images/yjhl0226_67.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organization submitted the plan?" }, { "from": "gpt", "value": "The answer is The Planning Committee to the Policy Committee" } ], "clip_score": 22.265625, "loss": 2.1393983364105225 }, { "id": "identity_253164", "image": "TQA/images/question_images/ocean_waves_7125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the point on a wave with the maximum value or upward displacement within a cycle and explain the role of gravity in the formation of this point" }, { "from": "gpt", "value": "The answer is Crest; gravity pulls water downward, forming crests and troughs" } ], "clip_score": 32.15625, "loss": 2.5688700675964355 }, { "id": "identity_202358", "image": "A-OKVQA/images/116694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment are the people queuing for?" }, { "from": "gpt", "value": "The answer is Within ten words" } ], "clip_score": 22.765625, "loss": 5.007687568664551 }, { "id": "identity_129327", "image": "PMC-VQA/images/PMC4939254_F3_50193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the \"LA\" label indicate in this echocardiogram?" }, { "from": "gpt", "value": "The answer is Left Atrium" } ], "clip_score": 31.578125, "loss": 3.339080572128296 }, { "id": "identity_272196", "image": "ScienceQA/images/13091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Illinois?\nChoices:\n(A) Chicago\n(B) Topeka\n(C) Springfield\n(D) Nashville" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.25, "loss": 4.035924911499023 }, { "id": "identity_236349", "image": "MapQA/images/map_4745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest number of clothes, and what is the range of the number of clothes for that state?" }, { "from": "gpt", "value": "The answer is Florida, 55058-63778" } ], "clip_score": 37.1875, "loss": 2.9054837226867676 }, { "id": "identity_134960", "image": "DocVQA/images/ypvw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is the strategic publication planning for?" }, { "from": "gpt", "value": "The answer is 2004" } ], "clip_score": 19.53125, "loss": 1.6252447366714478 }, { "id": "identity_232457", "image": "A-OKVQA/images/470442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season is it likely to be in the image, considering the vehicle is holding snow and the people are dressed in heavy clothing?" }, { "from": "gpt", "value": "The answer is winter" } ], "clip_score": 21.15625, "loss": 3.129208564758301 }, { "id": "identity_60218", "image": "ChartQA/images/multi_col_80127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in the number of warehouse outlets from 2016 to 2020?" }, { "from": "gpt", "value": "The answer is 26.63%" } ], "clip_score": 29.015625, "loss": 1.9334633350372314 }, { "id": "identity_58205", "image": "FigureQA/images/3716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the Dodger Blue bar on the x-axis?" }, { "from": "gpt", "value": "The answer is Approximately 9" } ], "clip_score": 31.703125, "loss": 2.7858688831329346 }, { "id": "identity_160946", "image": "GeoQA+/images/639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在Rt△ABC中,若∠BCD=35°,求∠ACB的度数为()" }, { "from": "gpt", "value": "The answer is 35°" } ], "clip_score": 30.765625, "loss": 1.8786734342575073 }, { "id": "identity_212739", "image": "VQA2.0/images/205243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sailboat closest to the camera?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.265625, "loss": 2.852294445037842 }, { "id": "identity_52558", "image": "DocVQA/images/kzph0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the Topeka Capital-Journal?" }, { "from": "gpt", "value": "The answer is 616 Jefferson, Topeka, Kansas" } ], "clip_score": 19.875, "loss": 2.1412901878356934 }, { "id": "identity_231871", "image": "AI2D/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the food web diagram, identify the primary consumer that directly feeds on the producer, and list all the secondary consumers that feed on this primary consumer" }, { "from": "gpt", "value": "The answer is Rabbit; Bear, Arctic Fox, Snowy Owl" } ], "clip_score": 34.28125, "loss": 2.9271085262298584 }, { "id": "identity_234439", "image": "FigureQA/images/17695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Violet Red and Forest Green?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 34.71875, "loss": 1.6396499872207642 }, { "id": "identity_176084", "image": "IconQA/images/1486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black marbles are there?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.703125, "loss": 2.288111686706543 }, { "id": "identity_42323", "image": "MapQA/images/map_11689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value for the state of California?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 26.15625, "loss": 2.46235728263855 }, { "id": "identity_192369", "image": "PMC-VQA/images/PMC2500027_F1_26401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible abnormality in the brain scan?" }, { "from": "gpt", "value": "The answer is No visible abnormality" } ], "clip_score": 29.515625, "loss": 2.7281503677368164 }, { "id": "identity_272094", "image": "TextVQA/images/b385728633d4f34c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company manufactured the blue box?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 9.234375, "loss": 3.9585213661193848 }, { "id": "identity_86372", "image": "GeoQA+/images/4261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In quadrilateral ABCD with AD parallel to BC, if AE = 5 and the area of triangle AOE is 10, what is the length of segment OE?" }, { "from": "gpt", "value": "The answer is OE is 4 units long" } ], "clip_score": 34.21875, "loss": 2.1454148292541504 }, { "id": "identity_276910", "image": "VQA2.0/images/90307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color is the bike on the right?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 28.234375, "loss": 3.774768352508545 }, { "id": "identity_214067", "image": "VQA2.0/images/223836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles have visible white wall tires?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.484375, "loss": 4.692868709564209 }, { "id": "identity_41051", "image": "FigureQA/images/33265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color in the pie chart?" }, { "from": "gpt", "value": "The answer is Medium Orchid" } ], "clip_score": 34.84375, "loss": 2.326618194580078 }, { "id": "identity_277526", "image": "PMC-VQA/images/PMC3512476_F3_170135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is revealed inside the kidney by the computed tomography scan? \nChoices:\n(A) High-density areas \n(B) Normal tissue \n(C) Low-density areas \n(D) Fluid-filled cysts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.25, "loss": 4.06677770614624 }, { "id": "identity_231268", "image": "VQA-AS/images/15765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual clues that indicate the girl is waiting for someone?" }, { "from": "gpt", "value": "The answer is body language, environment, expression" } ], "clip_score": 23.5625, "loss": 3.7522575855255127 }, { "id": "identity_111571", "image": "VQA2.0/images/497005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the sky in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.6484375, "loss": 3.3409206867218018 }, { "id": "identity_205775", "image": "A-OKVQA/images/271870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is advertised on the first food truck?" }, { "from": "gpt", "value": "The answer is Sausages" } ], "clip_score": 25.734375, "loss": 3.041680097579956 }, { "id": "identity_41695", "image": "DVQA/images/bar_train_00080776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the category 'space'?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 24.515625, "loss": 2.173015594482422 }, { "id": "identity_120186", "image": "VQA2.0/images/470691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any duplicate spice jars on the shelves?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.46875, "loss": 2.9430315494537354 }, { "id": "identity_3152", "image": "A-OKVQA/images/528142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of transportation is being used?\nChoices:\n(A) air\n(B) rail\n(C) road\n(D) water" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.28125, "loss": 4.281509876251221 }, { "id": "identity_20962", "image": "CLEVR-Math/images/CLEVR_train_001641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cubes. How many objects are left?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.515625, "loss": 2.0329411029815674 }, { "id": "identity_50105", "image": "DVQA/images/bar_train_00148185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'driver' have the highest combined value of 'seat' and 'grave'?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.703125, "loss": 3.1574490070343018 }, { "id": "identity_3161", "image": "PMC-VQA/images/PMC6967279_Fig1_102276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the computed tomography thoracic examination? \nChoices:\n(A) Normal \n(B) Dilated oesophagus \n(C) Enlarged lungs \n(D) Collapsed lungs. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.21875, "loss": 4.900376796722412 }, { "id": "identity_254829", "image": "PMC-VQA/images/PMC6129593_F12_365251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific subregion of the hippocampus is being shown in the image, and what is its primary function related to memory?" }, { "from": "gpt", "value": "The answer is CA1, consolidation of long-term memory and spatial navigation" } ], "clip_score": 23.984375, "loss": 2.629770517349243 }, { "id": "identity_160778", "image": "Geometry3K/images/485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the circle?" }, { "from": "gpt", "value": "The answer is 49π" } ], "clip_score": 29.171875, "loss": 1.7111437320709229 }, { "id": "identity_281918", "image": "TextVQA/images/84907cf5f16cf785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of this advertisement?" }, { "from": "gpt", "value": "The answer is boston, mass" } ], "clip_score": 22.171875, "loss": 2.6569900512695312 }, { "id": "identity_86675", "image": "GeoQA+/images/2365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the oil reaches a maximum depth of h cm at the center of the barrel, what is the length of the chord CD at this depth?" }, { "from": "gpt", "value": "The answer is 2 * sqrt(50^2 - h^2) cm" } ], "clip_score": 32.0625, "loss": 1.656553030014038 }, { "id": "identity_19293", "image": "UniGeo/images/1477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,DE∥BC,分别交△ABC的边AB、AC于点D、E,\\frac{AD}{AB}=\\frac{1}{3},若AE=1,则EC=()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.140625, "loss": 2.688880443572998 }, { "id": "identity_5851", "image": "MapQA/images/map_9285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 12.2890625, "loss": 3.073484420776367 }, { "id": "identity_41106", "image": "DVQA/images/bar_train_00072013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of 'anger' for 'island'?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 29.078125, "loss": 1.6128324270248413 }, { "id": "identity_206671", "image": "VQA2.0/images/563713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of figures are depicted on the vase?" }, { "from": "gpt", "value": "The answer is Human figures" } ], "clip_score": 28.609375, "loss": 3.3475563526153564 }, { "id": "identity_141417", "image": "DVQA/images/bar_train_00037917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold by 'opera' store across all categories?" }, { "from": "gpt", "value": "The answer is 3.67" } ], "clip_score": 31.09375, "loss": 1.9650743007659912 }, { "id": "identity_251389", "image": "TextVQA/images/43d6d06a43bdd387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the brands of all the water bottles on the counter?" }, { "from": "gpt", "value": "The answer is Dasani and Aquafina" } ], "clip_score": 22.828125, "loss": 1.6483440399169922 }, { "id": "identity_142842", "image": "ChartQA/images/8314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the favoring rate for more wind turbine farms exceed the opposition rate for more offshore drilling?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.390625, "loss": 3.467557191848755 }, { "id": "identity_166067", "image": "TabMWP/images/30640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of vehicle sales among the dealerships?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 30.4375, "loss": 3.3300421237945557 }, { "id": "identity_186288", "image": "PMC-VQA/images/PMC7927443_f1_383050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of scan is depicted in the image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 29.84375, "loss": 3.2236621379852295 }, { "id": "identity_21234", "image": "UniGeo/images/2156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠C=90°,AB=10,AC=6,D、E、F分别是△ABC三边的中点,则△DEF的周长为()\nChoices:\n(A) 24\n(B) 16\n(C) 14\n(D) 12" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.8125, "loss": 2.4615397453308105 }, { "id": "identity_330524", "image": "ChartQA/images/12050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the orange color signify in the graph?" }, { "from": "gpt", "value": "The answer is Taliban" } ], "clip_score": 23.015625, "loss": 2.6916348934173584 }, { "id": "identity_76945", "image": "AI2D/images/82.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles of latitude are marked on this diagram?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.546875, "loss": 2.165384531021118 }, { "id": "identity_262327", "image": "TQA/images/abc_question_images/parts_worm_17306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which part of the worm are the dorsal pores located?\nChoices:\n(A) S\n(B) T\n(C) X\n(D) F" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.96875, "loss": 3.920323610305786 }, { "id": "identity_100123", "image": "TabMWP/images/24746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can Darren afford to buy a sewing needle and a small bottle of glue with his $1.23?" }, { "from": "gpt", "value": "The answer is Yes, he can afford them" } ], "clip_score": 23.0, "loss": 2.6067519187927246 }, { "id": "identity_37760", "image": "GeoQA+/images/3594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知在⊙A中,B、C、D三个点在圆上,且满足∠CBD=2∠BDC.若∠BAC=44°,则∠CAD的度数为()\nChoices:\n(A) 68°\n(B) 88°\n(C) 90°\n(D) 112°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.640625, "loss": 2.4584343433380127 }, { "id": "identity_88436", "image": "Geometry3K/images/226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of z in the figure?" }, { "from": "gpt", "value": "The answer is z = 13.2" } ], "clip_score": 26.890625, "loss": 2.055983543395996 }, { "id": "identity_36897", "image": "PlotQA/images/83112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph contain grids ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.625, "loss": 3.641265869140625 }, { "id": "identity_100077", "image": "TabMWP/images/11102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of fish in the tanks?" }, { "from": "gpt", "value": "The answer is 77 or 78 fish (mode is not unique)" } ], "clip_score": 36.25, "loss": 2.9433374404907227 }, { "id": "identity_179770", "image": "IconQA/images/42543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles are there in total?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 33.125, "loss": 2.0099899768829346 }, { "id": "identity_266191", "image": "GeoQA+/images/3998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, point C lies on the line segment AB, and point D is the midpoint of line segment BC. If AB = 10 and AC = 6, what is the length of line segment AD?\nChoices:\n(A) 6\n(B) 2\n(C) 8\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.078125, "loss": 2.4565930366516113 }, { "id": "identity_34744", "image": "DocVQA/images/rrcw0217_20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fax number?" }, { "from": "gpt", "value": "The answer is 609/924-6648" } ], "clip_score": 20.953125, "loss": 2.108274459838867 }, { "id": "identity_276014", "image": "DVQA/images/bar_train_00163246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the accuracy of the algorithm review in the dataset tray greater than the accuracy of the algorithm chorus in the dataset reach?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 35.5, "loss": 3.497030258178711 }, { "id": "identity_129731", "image": "A-OKVQA/images/66825.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the attire of the person standing closest to the horse sculpture's head?" }, { "from": "gpt", "value": "The answer is Brown jacket and hat" } ], "clip_score": 26.484375, "loss": 2.883270025253296 }, { "id": "identity_281696", "image": "IconQA/images/80172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the quantity of hearts is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.875, "loss": 3.98368763923645 }, { "id": "identity_203046", "image": "VQA2.0/images/442774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any windows in the room?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.546875, "loss": 3.1239752769470215 }, { "id": "identity_186605", "image": "A-OKVQA/images/475580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the registration number of the bus?" }, { "from": "gpt", "value": "The answer is LX55 EPF" } ], "clip_score": 26.640625, "loss": 3.137058734893799 }, { "id": "identity_96089", "image": "TabMWP/images/23799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teams have a size between 30 and 50 swimmers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.546875, "loss": 2.2593183517456055 }, { "id": "identity_165158", "image": "GeoQA+/images/908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with diameter BD, if angle AOB is 70 degrees, what is the degree measure of angle ADC?" }, { "from": "gpt", "value": "The answer is 35 degrees" } ], "clip_score": 33.75, "loss": 1.9976084232330322 }, { "id": "identity_36986", "image": "A-OKVQA/images/472828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What types of fruits does the vendor here specialize in?\nChoices:\n(A) citrus\n(B) tomatoes\n(C) melons\n(D) pome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 3.855844497680664 }, { "id": "identity_158879", "image": "ScienceQA/images/1262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.578125, "loss": 2.496734857559204 }, { "id": "identity_172799", "image": "TabMWP/images/29055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of one horse key chain and two pig key chains?" }, { "from": "gpt", "value": "The answer is $5.34" } ], "clip_score": 34.09375, "loss": 1.9960899353027344 }, { "id": "identity_176403", "image": "IconQA/images/20693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.84375, "loss": 3.222614288330078 }, { "id": "identity_70465", "image": "ScienceQA/images/18354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aquatic environment is this animal likely adapted to?" }, { "from": "gpt", "value": "The answer is Marine environment" } ], "clip_score": 27.546875, "loss": 3.265735626220703 }, { "id": "identity_148776", "image": "DocVQA/images/mjvg0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year was the Economic Opportunity Act mentioned?" }, { "from": "gpt", "value": "The answer is 1964" } ], "clip_score": 25.890625, "loss": 1.1236718893051147 }, { "id": "identity_29537", "image": "IconQA/images/61055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure the line. The line is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.25, "loss": 2.392641067504883 }, { "id": "identity_56406", "image": "DVQA/images/bar_train_00047919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest value for the 'joy' group?" }, { "from": "gpt", "value": "The answer is mad" } ], "clip_score": 25.8125, "loss": 4.573418617248535 }, { "id": "identity_171509", "image": "IconQA/images/61513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of purple marbles to the total number of marbles?" }, { "from": "gpt", "value": "The answer is 5:6" } ], "clip_score": 34.5, "loss": 1.4581797122955322 }, { "id": "identity_1490", "image": "VQA-RAD/images/synpic27402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image sufficient for a diagnosis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.296875, "loss": 2.946882724761963 }, { "id": "identity_209477", "image": "A-OKVQA/images/215549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the stadium in the background?" }, { "from": "gpt", "value": "The answer is Citi Field" } ], "clip_score": 21.84375, "loss": 2.0244345664978027 }, { "id": "identity_300999", "image": "ChartQA/images/multi_col_1098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest value of this oil and the lowest value of that oil?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 22.484375, "loss": 3.1355299949645996 }, { "id": "identity_257716", "image": "DocVQA/images/fryc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of patients who follow either the \"Three meals a day\" or \"Two meals, no lunch\" meal patterns, and are either \"Eligible but do not use\" or \"Not eligible\" for food stamps?" }, { "from": "gpt", "value": "The answer is 138%" } ], "clip_score": 20.15625, "loss": 1.9713586568832397 }, { "id": "identity_51814", "image": "DVQA/images/bar_train_00165210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'taxi' and 'carbon' algorithms?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.59375, "loss": 3.3519704341888428 }, { "id": "identity_25851", "image": "VizWiz/images/VizWiz_train_00002181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Name this button." }, { "from": "gpt", "value": "The answer is velvet sugar" } ], "clip_score": 16.59375, "loss": 2.2212436199188232 }, { "id": "identity_279588", "image": "TextVQA/images/165bdc1b0193a27a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is written on the street sign?" }, { "from": "gpt", "value": "The answer is boteler st" } ], "clip_score": 11.984375, "loss": 5.278849124908447 }, { "id": "identity_53710", "image": "DVQA/images/bar_train_00195362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the \"press\" algorithm more accurate than the \"driver\" algorithm on the \"column\" dataset?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 35.375, "loss": 2.677027940750122 }, { "id": "identity_268482", "image": "AI2D/images/523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which line has the same beginning and ending letter as D?\nChoices:\n(A) b\n(B) c\n(C) a\n(D) e" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.484375, "loss": 3.630556583404541 }, { "id": "identity_69123", "image": "AI2D/images/1165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part in the image is the site where ribosomal RNA is synthesized?" }, { "from": "gpt", "value": "The answer is Nucleolus" } ], "clip_score": 29.625, "loss": 1.5369075536727905 }, { "id": "identity_196825", "image": "PMC-VQA/images/PMC6221646_F1_392942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures can be identified in the mediastinum?" }, { "from": "gpt", "value": "The answer is Heart, aorta, and trachea" } ], "clip_score": 24.25, "loss": 1.949820876121521 }, { "id": "identity_123352", "image": "TextVQA/images/9a70df112c243ff4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the outfield?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.125, "loss": 4.576926231384277 }, { "id": "identity_140986", "image": "FigureQA/images/25006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Khaki greater than 70?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.8125, "loss": 3.2676002979278564 }, { "id": "identity_326242", "image": "IconQA/images/66183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes long is this?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.453125, "loss": 1.9133597612380981 }, { "id": "identity_203835", "image": "VQA2.0/images/260359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vegetables in the image that are typically used in making salads?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.28125, "loss": 2.9963393211364746 }, { "id": "identity_131071", "image": "Super-CLEVR/images/superCLEVR_new_001452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle in the image that is larger than the cyan scooter but smaller than the silver airplane?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.484375, "loss": 3.170480251312256 }, { "id": "identity_338464", "image": "DocVQA/images/skww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which program allocated $195,000?" }, { "from": "gpt", "value": "The answer is Honoraria" } ], "clip_score": 29.0625, "loss": 4.998473644256592 }, { "id": "identity_95014", "image": "TabMWP/images/6414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pages read by Debbie over the eight months?" }, { "from": "gpt", "value": "The answer is 660" } ], "clip_score": 30.109375, "loss": 2.5248031616210938 }, { "id": "identity_56516", "image": "DVQA/images/bar_train_00143653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy on the 'battle' dataset?" }, { "from": "gpt", "value": "The answer is brass" } ], "clip_score": 34.90625, "loss": 2.406803846359253 }, { "id": "identity_318259", "image": "AI2D/images/4208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram best indicates Magma cooling into Igneous Rock?\nChoices:\n(A) B\n(B) K\n(C) A\n(D) I" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.21875, "loss": 4.446610927581787 }, { "id": "identity_294903", "image": "A-OKVQA/images/133071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is the light green mixture?\n(A) sauerkraut\n(B) salsa verde\n(C) guacamole\n(D) pesto" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.84375, "loss": 4.885773658752441 }, { "id": "identity_4782", "image": "VQA-RAD/images/synpic48749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there ring-enhancement?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.1171875, "loss": 3.326166868209839 }, { "id": "identity_204516", "image": "A-OKVQA/images/579739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.9765625, "loss": 3.1124160289764404 }, { "id": "identity_208693", "image": "A-OKVQA/images/273579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cloth tied around the elephant's neck?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.671875, "loss": 3.2503387928009033 }, { "id": "identity_71874", "image": "AI2D/images/4635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the stage called that comes after the larva stage in the life cycle of a stable fly?" }, { "from": "gpt", "value": "The answer is Puparium" } ], "clip_score": 31.71875, "loss": 2.3119735717773438 }, { "id": "identity_31009", "image": "IconQA/images/73369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a plane. Which picture is third?\nChoices:\n(A) bear\n(B) train\n(C) plane" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.53125, "loss": 4.174116611480713 }, { "id": "identity_4610", "image": "AI2D/images/3233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The outermost part of the cell is called _______.\nChoices:\n(A) Cytoplasm\n(B) Ribosome\n(C) Lysosome\n(D) Cell(plasma) membrance" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.734375, "loss": 4.096194267272949 }, { "id": "identity_11497", "image": "DocVQA/images/xnhw0228_29.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Supplies Amount ?" }, { "from": "gpt", "value": "The answer is 58.00" } ], "clip_score": 19.078125, "loss": 2.04547119140625 }, { "id": "identity_150824", "image": "AI2D/images/4626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which rock sample in the image is an example of a volcanic glass?" }, { "from": "gpt", "value": "The answer is Obsidian" } ], "clip_score": 31.0, "loss": 1.746299147605896 }, { "id": "identity_336988", "image": "A-OKVQA/images/362090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will be the first thing they grab when they stand up?\nChoices:\n(A) dry cleaning\n(B) backpack\n(C) jacket\n(D) cane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.9296875, "loss": 4.238125324249268 }, { "id": "identity_131345", "image": "VQA2.0/images/14873.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bicycles in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.6875, "loss": 3.071017026901245 }, { "id": "identity_255503", "image": "TabMWP/images/14609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average amount Latrell spent on lunch from Tuesday to Saturday, and how much more did he spend on Friday compared to the average?" }, { "from": "gpt", "value": "The answer is $4.80" } ], "clip_score": 40.34375, "loss": 2.452302932739258 }, { "id": "identity_148299", "image": "DocVQA/images/ygwg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount allocated for consumable supplies?" }, { "from": "gpt", "value": "The answer is $5,500" } ], "clip_score": 25.3125, "loss": 2.0615944862365723 }, { "id": "identity_68403", "image": "AI2D/images/4450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which foot type has an arch that is not visible?" }, { "from": "gpt", "value": "The answer is Flat Foot" } ], "clip_score": 30.8125, "loss": 2.352666139602661 }, { "id": "identity_11863", "image": "FigureQA/images/2004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.671875, "loss": 4.726737022399902 }, { "id": "identity_202405", "image": "PMC-VQA/images/PMC5387068_F3_151495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of mesenteric lymphadenopathy?" }, { "from": "gpt", "value": "The answer is No evidence of mesenteric lymphadenopathy in this image" } ], "clip_score": 25.0625, "loss": 1.5141820907592773 }, { "id": "identity_170939", "image": "TabMWP/images/5775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more kilograms of cherries were sold on Saturday compared to Friday?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 37.34375, "loss": 2.3898026943206787 }, { "id": "identity_158924", "image": "AI2D/images/389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of Dragonfly nymph decreases, which organism would be directly affected?" }, { "from": "gpt", "value": "The answer is Fish" } ], "clip_score": 29.09375, "loss": 3.151120662689209 }, { "id": "identity_64865", "image": "TQA/images/question_images/dna_6764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What structure is labeled as the Phosphate backbone?" }, { "from": "gpt", "value": "The answer is The yellow ribbon-like structure" } ], "clip_score": 32.21875, "loss": 3.796464443206787 }, { "id": "identity_80247", "image": "GeoQA+/images/879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, PM and QN are perpendicular bisectors of AB and AC respectively. If angle BAC is 110 degrees, what is the measure of angle QPN?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 32.15625, "loss": 1.7441825866699219 }, { "id": "identity_64967", "image": "AI2D/images/3786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organ is responsible for the digestion and absorption of food?" }, { "from": "gpt", "value": "The answer is Small intestine" } ], "clip_score": 28.375, "loss": 2.242436647415161 }, { "id": "identity_230740", "image": "AI2D/images/2123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different species does the dingoe directly prey on in this food chain diagram?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.78125, "loss": 2.290235757827759 }, { "id": "identity_212329", "image": "TextVQA/images/6598b3b3b68fe255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this watch part of a special collection?" }, { "from": "gpt", "value": "The answer is Yes, it's a Limited Edition" } ], "clip_score": 29.40625, "loss": 2.4317591190338135 }, { "id": "identity_250373", "image": "DVQA/images/bar_train_00130091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer 'get' across all categories?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 29.171875, "loss": 2.341035842895508 }, { "id": "identity_111201", "image": "A-OKVQA/images/429456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the image?" }, { "from": "gpt", "value": "The answer is At least four" } ], "clip_score": 26.359375, "loss": 4.062855243682861 }, { "id": "identity_234531", "image": "TabMWP/images/34000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected student is either responsible or has 1 sibling? Simplify any fractions" }, { "from": "gpt", "value": "The answer is 14/19" } ], "clip_score": 32.46875, "loss": 2.2293496131896973 }, { "id": "identity_233116", "image": "Geometry3K/images/385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the distance \\( x \\) from point \\( F \\) to point \\( W \\) given that the scale factor from \\( W \\) to \\( W' \\) is 0.6" }, { "from": "gpt", "value": "The answer is 16.67" } ], "clip_score": 29.3125, "loss": 2.1333165168762207 }, { "id": "identity_46245", "image": "FigureQA/images/44168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have a legend that explains the colors?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.84375, "loss": 3.338329315185547 }, { "id": "identity_69030", "image": "ScienceQA/images/8115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which pair of magnets will repel each other?" }, { "from": "gpt", "value": "The answer is Pair 1" } ], "clip_score": 25.375, "loss": 1.7291406393051147 }, { "id": "identity_282148", "image": "CLEVR-Math/images/CLEVR_train_059275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you subtract 2 spheres, how many spheres remain?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 17.8125, "loss": 2.4708797931671143 }, { "id": "identity_6412", "image": "TQA/images/question_images/nuclear_energy_7097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After all the nuclear process, how electric energy is sent to be used?\nChoices:\n(A) Turbines\n(B) Pumps\n(C) Transmission lines\n(D) Steams" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.046875, "loss": 3.7679476737976074 }, { "id": "identity_307954", "image": "ScienceQA/images/9569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can they trade to each get what they want?\nChoices:\n(A) Troy can trade his tomatoes for Johnny's broccoli.\n(B) Johnny can trade his almonds for Troy's tomatoes.\n(C) Johnny can trade his broccoli for Troy's oranges.\n(D) Troy can trade his tomatoes for Johnny's carrots." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 3.292041778564453 }, { "id": "identity_5950", "image": "TQA/images/question_images/seasons_2641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What comes after the winter solstice?\nChoices:\n(A) summer solstice\n(B) autumnal equinox\n(C) vernal equinox\n(D) twilight solstice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.84375, "loss": 3.949699878692627 }, { "id": "identity_321252", "image": "A-OKVQA/images/555065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is unusual about the name of this boat?\nChoices:\n(A) missing n\n(B) extra l\n(C) extra m\n(D) extra e" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.515625, "loss": 3.8308138847351074 }, { "id": "identity_148276", "image": "DocVQA/images/nykh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is C. H. Criswell, Jr" } ], "clip_score": 24.8125, "loss": 3.212515115737915 }, { "id": "identity_68805", "image": "ScienceQA/images/8463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What body of water is west of California?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 23.84375, "loss": 2.767462968826294 }, { "id": "identity_76574", "image": "AI2D/images/4136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the process that occurs in the chloroplast as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Photosynthesis" } ], "clip_score": 34.65625, "loss": 1.7022147178649902 }, { "id": "identity_194249", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.9375, "loss": 4.787684917449951 }, { "id": "identity_170699", "image": "TabMWP/images/20180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of red buttons per bag?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 22.828125, "loss": 2.9301109313964844 }, { "id": "identity_170185", "image": "IconQA/images/84990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the twig is cut in half, what will be the length of each piece to the nearest eighth of an inch?" }, { "from": "gpt", "value": "The answer is 2.75 inches" } ], "clip_score": 35.125, "loss": 1.8217424154281616 }, { "id": "identity_54895", "image": "FigureQA/images/61719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label value does the Medium Aqua line cross the Gold line?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 35.78125, "loss": 2.1216959953308105 }, { "id": "identity_193313", "image": "A-OKVQA/images/185834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many snowmen can be seen on the mantelpiece?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.359375, "loss": 2.5461795330047607 }, { "id": "identity_50076", "image": "FigureQA/images/77421.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Medium Purple greater than Light Seafoam at x-axis label 80?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.015625, "loss": 3.2145607471466064 }, { "id": "identity_274464", "image": "TextVQA/images/aeb3e59991e7351e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which island is this?" }, { "from": "gpt", "value": "The answer is mt. desert island" } ], "clip_score": 21.859375, "loss": 2.608652353286743 }, { "id": "identity_115021", "image": "A-OKVQA/images/216910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the fins on the surfboard?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 27.15625, "loss": 3.442345380783081 }, { "id": "identity_14006", "image": "TextVQA/images/f9d18252cce3fdd0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand of this plane/?" }, { "from": "gpt", "value": "The answer is europa pro lt" } ], "clip_score": 35.96875, "loss": 3.767758369445801 }, { "id": "identity_17606", "image": "FigureQA/images/83048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Orchid intersect Bubblegum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.265625, "loss": 3.590391159057617 }, { "id": "identity_50464", "image": "FigureQA/images/180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other colors represented in the pie chart besides Royal Blue and Dark Orchid?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.8125, "loss": 2.936993360519409 }, { "id": "identity_179163", "image": "TabMWP/images/26148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Marie decides to go to the downtown after visiting the art museum, how much time will she spend traveling on the shuttle in total?" }, { "from": "gpt", "value": "The answer is 150 minutes" } ], "clip_score": 25.90625, "loss": 2.406456232070923 }, { "id": "identity_130413", "image": "VQA-AS/images/13424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bench in the playground?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 31.125, "loss": 3.1500532627105713 }, { "id": "identity_292135", "image": "TabMWP/images/20500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the table provided, is there a shortage or surplus at a price of $235?\nOptions:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.734375, "loss": 3.4983763694763184 }, { "id": "identity_282836", "image": "IconQA/images/37399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of hearts?\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 5\n(E) 3" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 26.890625, "loss": 3.4823505878448486 }, { "id": "identity_135878", "image": "DocVQA/images/zzcw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the meeting mentioned in the document?" }, { "from": "gpt", "value": "The answer is Berlin, Germany" } ], "clip_score": 19.90625, "loss": 2.86486554145813 }, { "id": "identity_92138", "image": "IconQA/images/53800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of purple squares that have a white square border around them?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 35.71875, "loss": 1.8652770519256592 }, { "id": "identity_24166", "image": "CLEVR-Math/images/CLEVR_train_064630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 green metallic cylinders. How many green metallic cylinders are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.15625, "loss": 2.4882686138153076 }, { "id": "identity_195245", "image": "PMC-VQA/images/PMC8139247_f1_446974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the lesion resemble?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 26.78125, "loss": 3.7547402381896973 }, { "id": "identity_111115", "image": "PMC-VQA/images/PMC2895667_ppat-1000980-g007_68025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you estimate the density of cells in a given area of the image?" }, { "from": "gpt", "value": "The answer is The estimated cell density (provide the calculated density)" } ], "clip_score": 23.171875, "loss": 3.9561195373535156 }, { "id": "identity_295779", "image": "AI2D/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to the endosperm?\nChoices:\n(A) C\n(B) A\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.53125, "loss": 4.318211078643799 }, { "id": "identity_245027", "image": "PMC-VQA/images/PMC9229633_tomography-08-00115-f001_323519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific anatomical structures are being measured in the right images of each panel, and how do these measurements relate to the diagnosis of hydrocephalus?" }, { "from": "gpt", "value": "The answer is Temporal horns of lateral ventricles, indicating potential hydrocephalus" } ], "clip_score": 30.984375, "loss": 2.391894578933716 }, { "id": "identity_139377", "image": "DocVQA/images/jlxw0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Task Forces are scheduled to present on Day 5?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.75, "loss": 2.5765531063079834 }, { "id": "identity_43157", "image": "FigureQA/images/91176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value that Royal Blue reaches on the graph?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 27.859375, "loss": 2.1869564056396484 }, { "id": "identity_40671", "image": "PlotQA/images/105394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the percentages of underweight children in all the countries shown?" }, { "from": "gpt", "value": "The answer is 107%" } ], "clip_score": 29.03125, "loss": 2.075406551361084 }, { "id": "identity_187893", "image": "TextVQA/images/27e2208a5295c38d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the denomination of the banknote?" }, { "from": "gpt", "value": "The answer is One dollar" } ], "clip_score": 21.53125, "loss": 3.8826372623443604 }, { "id": "identity_159250", "image": "Geometry3K/images/952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the trapezoid MNPQ is reflected across line PQ, what will be the area of the resulting figure?" }, { "from": "gpt", "value": "The answer is 720 square feet" } ], "clip_score": 36.96875, "loss": 1.8483638763427734 }, { "id": "identity_259713", "image": "VQA2.0/images/215616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of shoes is he selling?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.828125, "loss": 4.50266170501709 }, { "id": "identity_262765", "image": "VQA-AS/images/15952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl creating a mess?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.53125, "loss": 3.563821315765381 }, { "id": "identity_206294", "image": "A-OKVQA/images/264155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many slices of pizza have been taken from the whole pizza?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.453125, "loss": 2.245297908782959 }, { "id": "identity_285205", "image": "PMC-VQA/images/PMC3408502_pone-0041602-g004_147507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the figures that represent the same brain segment.\nChoices:\n(A) Figures B and C\n(B) Figures A and B\n(C) Figures A and C\n(D) Figures C and D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.7578125, "loss": 3.303187847137451 }, { "id": "identity_184361", "image": "VQA2.0/images/103352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many knobs are on the sink faucet?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.34375, "loss": 4.514920711517334 }, { "id": "identity_171179", "image": "TabMWP/images/27164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest values of Y?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 28.46875, "loss": 2.317108154296875 }, { "id": "identity_173352", "image": "IconQA/images/14074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line in decimeters?" }, { "from": "gpt", "value": "The answer is 6.8 decimeters" } ], "clip_score": 30.921875, "loss": 1.8013694286346436 }, { "id": "identity_205688", "image": "PMC-VQA/images/PMC6341153_Fig1_425609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the solid arrow?" }, { "from": "gpt", "value": "The answer is Corpus callosum" } ], "clip_score": 28.84375, "loss": 2.682424306869507 }, { "id": "identity_179321", "image": "CLEVR-Math/images/CLEVR_train_041301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.734375, "loss": 2.476287603378296 }, { "id": "identity_144464", "image": "DocVQA/images/hmgd0079_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the fee for Environ in 1990?" }, { "from": "gpt", "value": "The answer is 23,000" } ], "clip_score": 24.390625, "loss": 1.6061123609542847 }, { "id": "identity_79087", "image": "UniGeo/images/1890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is the diameter of circle E and ∠BEC = 40°, what is the measure of ∠BAC?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 33.15625, "loss": 1.5700331926345825 }, { "id": "identity_16796", "image": "UniGeo/images/2891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O直径,已知∠DCB=20°,则∠DBA为()\nChoices:\n(A) 50°\n(B) 20°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.6875, "loss": 2.601348400115967 }, { "id": "identity_313385", "image": "ScienceQA/images/11337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question can William's experiment best address?\nChoices:\n(A) Do steel nails rust in fewer days when submerged in a large volume of liquid compared to a small volume?\n(B) Do steel nails take fewer days to rust in water compared to vinegar?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.140625, "loss": 3.710787534713745 }, { "id": "identity_77617", "image": "ScienceQA/images/16803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the state that borders the highlighted state to the south?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 30.96875, "loss": 4.053775310516357 }, { "id": "identity_89033", "image": "Geometry3K/images/1486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In circle R, if SU = 20 and m∠YXZ = 45°, what is the length of YZ?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 33.09375, "loss": 1.8382844924926758 }, { "id": "identity_294072", "image": "PMC-VQA/images/PMC6766793_molecules-24-03215-f005_43086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the white asterisk (*) in the figure provided?\nChoices:\n(A) Substrate binding site \n(B) Active site \n(C) Site for water exclusion \n(D) Site for allosteric regulation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.0, "loss": 4.971027374267578 }, { "id": "identity_35603", "image": "TabMWP/images/17194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each pizza place has 10 booths. How many booths are in 4 pizza places?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 26.578125, "loss": 1.6266692876815796 }, { "id": "identity_72839", "image": "TQA/images/question_images/human_system_digestive_3672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organ is responsible for producing digestive enzymes and hormones?" }, { "from": "gpt", "value": "The answer is Pancreas" } ], "clip_score": 29.0, "loss": 2.039970636367798 }, { "id": "identity_272370", "image": "PMC-VQA/images/PMC5415772_Fig1_160019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the right panel image, which part of the splenic artery is impacted by the aneurysm? Is it the distal third, proximal third, middle third, or is it unable to determine from the image?\nChoices:\n(A) distal third \n(B) proximal third \n(C) middle third \n(D) unable to determine from image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.359375, "loss": 4.020215034484863 }, { "id": "identity_63897", "image": "ScienceQA/images/17796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the smallest land area among the highlighted ones?" }, { "from": "gpt", "value": "The answer is Delaware" } ], "clip_score": 27.765625, "loss": 2.335247755050659 }, { "id": "identity_247117", "image": "IconQA/images/3829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangles in the image, and what is the sum of the internal angles of all the triangles combined?" }, { "from": "gpt", "value": "The answer is 540 degrees" } ], "clip_score": 29.40625, "loss": 3.202235221862793 }, { "id": "identity_185515", "image": "PMC-VQA/images/PMC4137544_fig1_314138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a pathological finding in this MRI image?" }, { "from": "gpt", "value": "The answer is Yes, indicated by the arrow" } ], "clip_score": 32.46875, "loss": 2.7043135166168213 }, { "id": "identity_293631", "image": "TabMWP/images/3853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money did Sandra spend at the store on coriander if she bought 1/4 of a kilogram?" }, { "from": "gpt", "value": "The answer is 0.50" } ], "clip_score": 30.90625, "loss": 2.10272479057312 }, { "id": "identity_108432", "image": "VQA-AS/images/12795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pet is sitting on the person's lap?" }, { "from": "gpt", "value": "The answer is A cat" } ], "clip_score": 26.234375, "loss": 3.592761754989624 }, { "id": "identity_279751", "image": "VizWiz/images/VizWiz_train_00014569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vocabulary word is present in the first item?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.984375, "loss": 4.201694011688232 }, { "id": "identity_167037", "image": "TabMWP/images/921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the vendor aims to have fewer than 15 leftover loaves each day, on how many days did they meet this goal?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 41.65625, "loss": 2.842008590698242 }, { "id": "identity_39302", "image": "DocVQA/images/gzbn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the author mentioned under the author's archive?" }, { "from": "gpt", "value": "The answer is Senior Director, Regulatory Science Policy" } ], "clip_score": 23.84375, "loss": 3.2480361461639404 }, { "id": "identity_81971", "image": "Geometry3K/images/1394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment DF?" }, { "from": "gpt", "value": "The answer is DF = 5.59 (rounded to two decimal places)" } ], "clip_score": 30.15625, "loss": 1.7593721151351929 }, { "id": "identity_119076", "image": "PMC-VQA/images/PMC3411849_pone-0042850-g007_148512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the asterisk (*) in the image?" }, { "from": "gpt", "value": "The answer is Area of interest or abnormality" } ], "clip_score": 23.015625, "loss": 3.4974966049194336 }, { "id": "identity_322655", "image": "TextVQA/images/c8d19b2d192d6fcf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is in the green bottle?" }, { "from": "gpt", "value": "The answer is 7up" } ], "clip_score": 21.53125, "loss": 2.5761852264404297 }, { "id": "identity_307285", "image": "DocVQA/images/jzhd0227_28.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number mentioned in this document?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.0, "loss": 2.9371516704559326 }, { "id": "identity_28063", "image": "TextVQA/images/0116dcf25359ea13.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letter is on the green candy's chest?" }, { "from": "gpt", "value": "The answer is letter m" } ], "clip_score": 21.265625, "loss": 3.4459123611450195 }, { "id": "identity_279768", "image": "VQA2.0/images/299794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the farmer have in his cart?" }, { "from": "gpt", "value": "The answer is banana" } ], "clip_score": 28.984375, "loss": 2.9759576320648193 }, { "id": "identity_163551", "image": "GeoQA+/images/2015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if ∠D = 150°, what is the length of diagonal BD if AB = 10 units?" }, { "from": "gpt", "value": "The answer is 2 * 10 * sin(30°)" } ], "clip_score": 33.5625, "loss": 1.9582537412643433 }, { "id": "identity_16172", "image": "DocVQA/images/trkm0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cost of Agricultural Experiment Station?" }, { "from": "gpt", "value": "The answer is 12,100" } ], "clip_score": 16.078125, "loss": 2.2492079734802246 }, { "id": "identity_220720", "image": "A-OKVQA/images/161231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the platform number for the train on the right?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 24.046875, "loss": 2.3019587993621826 }, { "id": "identity_145579", "image": "DocVQA/images/lkbl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient of the memo?" }, { "from": "gpt", "value": "The answer is Dr. Paul Saltman" } ], "clip_score": 26.28125, "loss": 2.6635234355926514 }, { "id": "identity_290392", "image": "ScienceQA/images/5028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.0, "loss": 4.145320415496826 }, { "id": "identity_53088", "image": "ChartQA/images/two_col_40255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in revenue from 2017 to 2018?" }, { "from": "gpt", "value": "The answer is 3.57%" } ], "clip_score": 26.890625, "loss": 1.956750512123108 }, { "id": "identity_165784", "image": "UniGeo/images/1876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Xiao Ming continues to walk in the same direction from point C for the same distance as from A to B, what will be the angle between his starting direction at A and his final direction at the new point D?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 35.3125, "loss": 1.688328504562378 }, { "id": "identity_258677", "image": "PMC-VQA/images/PMC2500027_F1_26401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of a T2-weighted MRI scan, and how do they help in identifying the type of scan obtained on day 48?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI scan" } ], "clip_score": 31.875, "loss": 2.081683397293091 }, { "id": "identity_252637", "image": "PMC-VQA/images/PMC7779423_fig0009_333740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structures highlighted in the enlarged region of the image and describe their primary functions" }, { "from": "gpt", "value": "The answer is Pons: relays signals, regulates breathing; Cerebellum: coordinates movement, balance" } ], "clip_score": 29.296875, "loss": 2.84285306930542 }, { "id": "identity_255941", "image": "Geometry3K/images/1156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of quadrilateral MNPQ is 180 square units, find the area of quadrilateral XYZW" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 30.671875, "loss": 2.682072877883911 }, { "id": "identity_312880", "image": "TabMWP/images/32026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Observe the table. Then respond to the question. At a price of $735, is there a shortage or a surplus?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.6875, "loss": 3.6167778968811035 }, { "id": "identity_244096", "image": "PMC-VQA/images/PMC9393506_f1_377421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are adjacent to the lesion located in the sphenoid bone, and how might this affect surrounding areas?" }, { "from": "gpt", "value": "The answer is Sphenoid sinus, nasal cavity, brain; sinus issues, nasal obstruction, neurological symptoms" } ], "clip_score": 27.796875, "loss": 2.324570894241333 }, { "id": "identity_98585", "image": "IconQA/images/63648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are green but not circles?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.03125, "loss": 2.508901834487915 }, { "id": "identity_168831", "image": "IconQA/images/11944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one orange marble is removed, what fraction of the original number of marbles remains?" }, { "from": "gpt", "value": "The answer is 4/5" } ], "clip_score": 32.25, "loss": 1.8615401983261108 }, { "id": "identity_47268", "image": "FigureQA/images/70540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Web Green on the y-axis?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 26.8125, "loss": 2.5409791469573975 }, { "id": "identity_180535", "image": "TabMWP/images/11730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost of the five items listed?" }, { "from": "gpt", "value": "The answer is $3,385.00" } ], "clip_score": 20.140625, "loss": 1.6360962390899658 }, { "id": "identity_90689", "image": "UniGeo/images/3400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠ABC = 71° and ∠CAB = 53°, what is the measure of ∠ADB?" }, { "from": "gpt", "value": "The answer is 56°" } ], "clip_score": 36.59375, "loss": 1.801222801208496 }, { "id": "identity_225514", "image": "AI2D/images/2071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If both the Blue Shark and the Chub Mackerel were to disappear, which organism would experience the most significant population increase?" }, { "from": "gpt", "value": "The answer is Northern Anchovy" } ], "clip_score": 31.25, "loss": 2.1689071655273438 }, { "id": "identity_178512", "image": "TabMWP/images/14741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more money does Luther need to buy a watering can?" }, { "from": "gpt", "value": "The answer is Luther needs $29.47 more" } ], "clip_score": 30.796875, "loss": 2.9506287574768066 }, { "id": "identity_81111", "image": "Geometry3K/images/347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment PR?" }, { "from": "gpt", "value": "The answer is 10√2 or approximately 14.14" } ], "clip_score": 28.4375, "loss": 1.942179560661316 }, { "id": "identity_191950", "image": "PMC-VQA/images/PMC1821330_F2_10049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is shown in the upper panel of the image?" }, { "from": "gpt", "value": "The answer is Epithelial tissue" } ], "clip_score": 30.328125, "loss": 2.1200954914093018 }, { "id": "identity_245375", "image": "Super-CLEVR/images/superCLEVR_new_000829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels, and what is the total number of wheels among all these vehicles?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 24.28125, "loss": 3.8726842403411865 }, { "id": "identity_173029", "image": "IconQA/images/62906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes have more than 6 sides?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 30.078125, "loss": 2.4691367149353027 }, { "id": "identity_120255", "image": "VQA-AS/images/15271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the plant pot on the table?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 19.40625, "loss": 3.545466661453247 }, { "id": "identity_18331", "image": "VQA-AS/images/19989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on top of the stool?" }, { "from": "gpt", "value": "The answer is plant" } ], "clip_score": 21.421875, "loss": 3.112726926803589 }, { "id": "identity_39884", "image": "DVQA/images/bar_train_00006479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of all categories in the \"scheme\" group?" }, { "from": "gpt", "value": "The answer is Approximately 32" } ], "clip_score": 23.828125, "loss": 3.1217660903930664 }, { "id": "identity_292122", "image": "TextVQA/images/b48751918cd7c627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of maps available?" }, { "from": "gpt", "value": "The answer is 122" } ], "clip_score": 13.4140625, "loss": 1.8615351915359497 }, { "id": "identity_191662", "image": "A-OKVQA/images/122350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the straw in the blue cup?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.1875, "loss": 3.4960129261016846 }, { "id": "identity_237785", "image": "TQA/images/question_images/periodic_table_7358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which elements in the periodic table are classified as Noble Gases, and what are their atomic numbers?" }, { "from": "gpt", "value": "The answer is Helium (2), Neon (10), Argon (18), Krypton (36), Xenon (54), Radon (86), Oganesson (118)" } ], "clip_score": 29.875, "loss": 0.6629753112792969 }, { "id": "identity_11038", "image": "FigureQA/images/27333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 3.9312081336975098 }, { "id": "identity_6711", "image": "VQA2.0/images/436046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is falling from the sky?" }, { "from": "gpt", "value": "The answer is snow" } ], "clip_score": 19.671875, "loss": 2.4990642070770264 }, { "id": "identity_290160", "image": "VQA-AS/images/5184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl throwing the boy a football?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.953125, "loss": 3.4864814281463623 }, { "id": "identity_107110", "image": "Super-CLEVR/images/superCLEVR_new_000499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large vehicle that has multiple windows on its side?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 21.109375, "loss": 3.7104339599609375 }, { "id": "identity_77544", "image": "TQA/images/question_images/ocean_zones_7138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bottom is found in the continental slope area according to the image?" }, { "from": "gpt", "value": "The answer is Muddy bottom" } ], "clip_score": 21.265625, "loss": 2.558669090270996 }, { "id": "identity_169942", "image": "IconQA/images/19843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one bucket is removed from the image, how many objects will remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.90625, "loss": 2.264040946960449 }, { "id": "identity_242013", "image": "ScienceQA/images/16710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scientific name of this organism, and what are two key identifying features that distinguish it from other similar species?" }, { "from": "gpt", "value": "The answer is Ardea herodias, long sharp bill, blue-gray plumage" } ], "clip_score": 28.328125, "loss": 2.7714121341705322 }, { "id": "identity_22874", "image": "DVQA/images/bar_train_00083013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the largest summed value?" }, { "from": "gpt", "value": "The answer is club" } ], "clip_score": 20.78125, "loss": 3.202632427215576 }, { "id": "identity_136942", "image": "ChartQA/images/16354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Excellent/Good in August 2011?" }, { "from": "gpt", "value": "The answer is 49" } ], "clip_score": 19.09375, "loss": 2.719583749771118 }, { "id": "identity_129750", "image": "A-OKVQA/images/134815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the countertop?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.71875, "loss": 3.130079746246338 }, { "id": "identity_337594", "image": "TabMWP/images/26201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An author recorded how many words were written over the past 3 days. How many words in total were written on Wednesday and Friday?" }, { "from": "gpt", "value": "The answer is 7,270" } ], "clip_score": 39.0625, "loss": 2.4742941856384277 }, { "id": "identity_224475", "image": "Super-CLEVR/images/superCLEVR_new_000638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rubber things are there in total, and what are their sizes?" }, { "from": "gpt", "value": "The answer is 2 large rubber things" } ], "clip_score": 16.21875, "loss": 3.02927827835083 }, { "id": "identity_329811", "image": "TextVQA/images/2d7c6aae590d63f8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the person with the pink arm band?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 15.6640625, "loss": 2.3255715370178223 }, { "id": "identity_43881", "image": "DocVQA/images/zhfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of pages transmitted?" }, { "from": "gpt", "value": "The answer is 02" } ], "clip_score": 17.78125, "loss": 2.833099126815796 }, { "id": "identity_308882", "image": "A-OKVQA/images/27975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is tattoo allowed here?\nChoices:\n(A) maybe no\n(B) no\n(C) absolutely no\n(D) yes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.1796875, "loss": 3.626319408416748 }, { "id": "identity_44630", "image": "DVQA/images/bar_train_00093155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold by the store \"sky\" for all items?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 26.546875, "loss": 2.3281822204589844 }, { "id": "identity_44667", "image": "DocVQA/images/kzbn0226_40.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What intrinsic factors are considered for identifying susceptible subpopulations according to the document?" }, { "from": "gpt", "value": "The answer is life stage, reproductive status, gender, genetic traits" } ], "clip_score": 21.15625, "loss": 2.9722166061401367 }, { "id": "identity_245731", "image": "IconQA/images/46565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the digits of the number shown?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 22.171875, "loss": 1.8798681497573853 }, { "id": "identity_176584", "image": "TabMWP/images/36991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you buy two orders of biscuits and gravy with a $10 bill?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.90625, "loss": 2.730275869369507 }, { "id": "identity_319210", "image": "VQA2.0/images/352213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 14.5703125, "loss": 2.2171900272369385 }, { "id": "identity_87380", "image": "UniGeo/images/1307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB is a diameter of the circle with center O and ∠ABC = 55°, what is the measure of ∠BAC?" }, { "from": "gpt", "value": "The answer is 55°" } ], "clip_score": 36.5, "loss": 1.98578941822052 }, { "id": "identity_154821", "image": "ScienceQA/images/16625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which direction does the highlighted country face towards the Caribbean Sea?" }, { "from": "gpt", "value": "The answer is South" } ], "clip_score": 33.5625, "loss": 3.19596529006958 }, { "id": "identity_264976", "image": "UniGeo/images/2750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in quadrilateral ABCD, AB = 10, AD = 6, EC is perpendicular to AE, FC is perpendicular to AB, and CF = 3. What is the length of CE?\nChoices:\n(A) 5\n(B) 6\n(C) 8\n(D) 10" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.125, "loss": 2.9121711254119873 }, { "id": "identity_243509", "image": "PMC-VQA/images/PMC7925057_FIG6_382183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the depth of fluid in the joint for both the left and right ultrasound images and determine the difference in depth between them" }, { "from": "gpt", "value": "The answer is The difference in depth between the left and right ultrasound images" } ], "clip_score": 30.265625, "loss": 1.5611072778701782 }, { "id": "identity_144228", "image": "DVQA/images/bar_train_00184081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm milk in the dataset beach?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.3125, "loss": 2.9228389263153076 }, { "id": "identity_178600", "image": "IconQA/images/55771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many edges does this shape have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.421875, "loss": 2.043437957763672 }, { "id": "identity_143352", "image": "DocVQA/images/hpbg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the Top-to-Top Meeting mentioned in the letter?" }, { "from": "gpt", "value": "The answer is October 9" } ], "clip_score": 23.9375, "loss": 4.107877254486084 }, { "id": "identity_97000", "image": "TabMWP/images/6473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of the math quiz?" }, { "from": "gpt", "value": "The answer is 7.68" } ], "clip_score": 34.875, "loss": 2.2644381523132324 }, { "id": "identity_173124", "image": "TabMWP/images/5732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two bow ties are selected at random, what is the probability that both are made of cotton?" }, { "from": "gpt", "value": "The answer is 7/15 * 6/14 = 42/210 = 1/5" } ], "clip_score": 30.0625, "loss": 1.329539179801941 }, { "id": "identity_156124", "image": "TQA/images/question_images/earth_eclipses_695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the eclipse where a ring of the sun is visible around the moon?" }, { "from": "gpt", "value": "The answer is Annular" } ], "clip_score": 18.03125, "loss": 2.737616777420044 }, { "id": "identity_79874", "image": "GeoQA+/images/1031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle EFD?" }, { "from": "gpt", "value": "The answer is -40 degrees" } ], "clip_score": 31.890625, "loss": 2.8392930030822754 }, { "id": "identity_131784", "image": "VQA2.0/images/352213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the two people engaged in?" }, { "from": "gpt", "value": "The answer is Cross-country skiing" } ], "clip_score": 29.359375, "loss": 2.229893684387207 }, { "id": "identity_69865", "image": "ScienceQA/images/9800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 31.078125, "loss": 3.3729751110076904 }, { "id": "identity_326560", "image": "TabMWP/images/33260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A journalism student counted the number of pages in several major magazines. How many of these magazines had at least 30 pages?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 26.0625, "loss": 2.476886034011841 }, { "id": "identity_318836", "image": "Geometry3K/images/561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If \\( x = 7 \\sqrt{3} \\), determine \\( b \\).\nChoices:\n(A) 7\n(B) 7 \\sqrt{3}\n(C) 14 \\sqrt{3}\n(D) 42" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.625, "loss": 2.631859302520752 }, { "id": "identity_161042", "image": "Geometry3K/images/1028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the triangle RTS?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.1875, "loss": 2.4393742084503174 }, { "id": "identity_130032", "image": "PMC-VQA/images/PMC8122566_Fig1_440882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is used in this image?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 28.875, "loss": 2.007652997970581 }, { "id": "identity_170239", "image": "TabMWP/images/8545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you have $1.00, how many pieces of polka-dotted paper can you buy?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.5, "loss": 2.156033992767334 }, { "id": "identity_102553", "image": "CLEVR-Math/images/CLEVR_train_022030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cylinders are there in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.109375, "loss": 2.70459246635437 }, { "id": "identity_299432", "image": "DocVQA/images/mtkg0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the document?" }, { "from": "gpt", "value": "The answer is literature cited" } ], "clip_score": 18.25, "loss": 3.5566914081573486 }, { "id": "identity_100521", "image": "IconQA/images/89810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the shape in the image a polygon?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.859375, "loss": 3.7581963539123535 }, { "id": "identity_273541", "image": "FigureQA/images/74537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest: Dark Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.2109375, "loss": 4.684676170349121 }, { "id": "identity_264096", "image": "GeoQA+/images/3519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, where angle A is 90°, point D lies on side AC, and DE is parallel to BC. If angle 1 is 155°, what is the measure of angle B?\n(A) 45°\n(B) 55°\n(C) 65°\n(D) 75°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 2.4372363090515137 }, { "id": "identity_205119", "image": "PMC-VQA/images/PMC6572793_Fig6_495030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are shown in the image?" }, { "from": "gpt", "value": "The answer is Mammalian cells" } ], "clip_score": 26.9375, "loss": 2.973207950592041 }, { "id": "identity_230154", "image": "DVQA/images/bar_train_00060369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the 'rush' bars for all categories combined?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 22.40625, "loss": 2.3844077587127686 }, { "id": "identity_221103", "image": "A-OKVQA/images/56116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 17.5625, "loss": 4.579247951507568 }, { "id": "identity_92328", "image": "IconQA/images/60525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of minutes from 1:30 PM to 3:00 PM?" }, { "from": "gpt", "value": "The answer is 90 minutes" } ], "clip_score": 29.21875, "loss": 1.630448341369629 }, { "id": "identity_242645", "image": "ScienceQA/images/20615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the top magnet is rotated 180 degrees around its center, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.234375, "loss": 3.038039445877075 }, { "id": "identity_160896", "image": "GeoQA+/images/3658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is Angle AOC = 45 degrees" } ], "clip_score": 30.90625, "loss": 1.732217788696289 }, { "id": "identity_149882", "image": "DocVQA/images/qspp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals contributed to this report?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.4375, "loss": 2.330022096633911 }, { "id": "identity_274491", "image": "AI2D/images/2710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time of day does the letter P typically occur?\nChoices:\n(A) midnight\n(B) noon\n(C) sunset\n(D) sunrise" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.828125, "loss": 4.459048748016357 }, { "id": "identity_65491", "image": "AI2D/images/1086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the outer edge of the leaf?" }, { "from": "gpt", "value": "The answer is Margin" } ], "clip_score": 32.5, "loss": 2.8825337886810303 }, { "id": "identity_231869", "image": "VQA2.0/images/48759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the vehicle closest to the woman crossing the street?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 26.140625, "loss": 3.9514853954315186 }, { "id": "identity_35599", "image": "PMC-VQA/images/PMC9136286_F1_290541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the corpus callosum is the lesion showing patchy enhancement in image B? \nChoices:\n(A) Left genu, \n(B) Right splenium, \n(C) Middle body, \n(D) Anterior portion. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.234375, "loss": 4.098211288452148 }, { "id": "identity_306299", "image": "PMC-VQA/images/PMC4402572_fig10_379167.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) Face mask \n(B) Brain mask \n(C) Heart mask \n(D) Liver mask" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.328125, "loss": 4.169268608093262 }, { "id": "identity_126464", "image": "VizWiz/images/VizWiz_train_00016425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many application icons have a red notification badge?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.90625, "loss": 2.427544355392456 }, { "id": "identity_36176", "image": "FigureQA/images/20903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua greater than Forest Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.859375, "loss": 3.6898932456970215 }, { "id": "identity_142776", "image": "PlotQA/images/19757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the aid flow from Denmark to Korea (Democratic) ever exceed 1000000 US$?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.90625, "loss": 2.8675754070281982 }, { "id": "identity_199268", "image": "A-OKVQA/images/293081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a clear sky or clouds in the background?" }, { "from": "gpt", "value": "The answer is Clear sky" } ], "clip_score": 14.71875, "loss": 2.703829765319824 }, { "id": "identity_53435", "image": "DVQA/images/bar_train_00117237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between 'luck' and 'folk' on 'estate'?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.484375, "loss": 2.876636505126953 }, { "id": "identity_188480", "image": "A-OKVQA/images/326726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the building in the background?" }, { "from": "gpt", "value": "The answer is Adidas" } ], "clip_score": 21.265625, "loss": 2.805990219116211 }, { "id": "identity_78715", "image": "AI2D/images/4804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which figure depicts a branch with leaves only at the bottom?" }, { "from": "gpt", "value": "The answer is Figure B" } ], "clip_score": 28.890625, "loss": 2.7694156169891357 }, { "id": "identity_253766", "image": "A-OKVQA/images/80078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of bananas are displayed in this section, and what is the total price if you buy one of each type?" }, { "from": "gpt", "value": "The answer is 5 types, $4.73" } ], "clip_score": 34.375, "loss": 3.0823771953582764 }, { "id": "identity_174071", "image": "IconQA/images/58890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this shape have any edges?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.0625, "loss": 3.028224229812622 }, { "id": "identity_239835", "image": "FigureQA/images/87855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in the area under the curve between Light Gold and this color?" }, { "from": "gpt", "value": "The answer is Dark Gold, approximately 1" } ], "clip_score": 33.6875, "loss": 4.059887409210205 }, { "id": "identity_139367", "image": "DVQA/images/bar_train_00019896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any algorithm that has consistent accuracy across all datasets?" }, { "from": "gpt", "value": "The answer is No, all algorithms have varying accuracy" } ], "clip_score": 38.15625, "loss": 2.494741916656494 }, { "id": "identity_122654", "image": "VQA-RAD/images/synpic26697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the black arrow in the image?" }, { "from": "gpt", "value": "The answer is Rigler's sign" } ], "clip_score": 21.453125, "loss": 3.0992133617401123 }, { "id": "identity_201646", "image": "PMC-VQA/images/PMC6760372_f1-arh-23-3-187_41504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 32.0625, "loss": 3.6182758808135986 }, { "id": "identity_231595", "image": "VizWiz/images/VizWiz_train_00003698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times is the word \"coffee\" written on the coffee cup, and what is the total number of words written on the cup?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.0, "loss": 4.416838645935059 }, { "id": "identity_49689", "image": "PlotQA/images/154712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the amount spent by Congo Democratic greater than that of Ethiopia?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.1875, "loss": 3.2313246726989746 }, { "id": "identity_126976", "image": "PMC-VQA/images/PMC4151864_fig5_317318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to capture the image shown?" }, { "from": "gpt", "value": "The answer is Ultrasound" } ], "clip_score": 34.5, "loss": 2.173962354660034 }, { "id": "identity_293468", "image": "FigureQA/images/12362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Periwinkle or Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.59375, "loss": 4.569521903991699 }, { "id": "identity_276050", "image": "A-OKVQA/images/165492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of animal is required to operate this boat? \nOptions:\n(A) elephant\n(B) cat\n(C) dog\n(D) human" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.234375, "loss": 4.045424461364746 }, { "id": "identity_105176", "image": "Super-CLEVR/images/superCLEVR_new_000790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more motorcycles or buses in the image?" }, { "from": "gpt", "value": "The answer is Buses" } ], "clip_score": 23.09375, "loss": 2.3916540145874023 }, { "id": "identity_81624", "image": "GeoQA+/images/2058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle ABC is 12 and E is the midpoint of AD, what is the area of triangle ADE?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.53125, "loss": 2.278162717819214 }, { "id": "identity_204030", "image": "A-OKVQA/images/241728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 16.6875, "loss": 3.641651153564453 }, { "id": "identity_29735", "image": "GeoQA+/images/2646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,将边长为5个单位长度的等边△ABC沿边BC向右平移4个单位得到△A′B′C′,则线段B′C的长为()\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.65625, "loss": 2.8863654136657715 }, { "id": "identity_241907", "image": "DVQA/images/bar_train_00071482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the categories represented by darkorange and green colors for the objects 'trust' and 'effort'?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 30.375, "loss": 2.157360792160034 }, { "id": "identity_48776", "image": "PlotQA/images/86402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the percentage of ICT goods imported between India and Namibia in 2010?" }, { "from": "gpt", "value": "The answer is 4%" } ], "clip_score": 26.25, "loss": 2.3123652935028076 }, { "id": "identity_113346", "image": "TextVQA/images/a831d550a637bb73.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What musical instrument is depicted on the sign to the left of the cafe entrance?" }, { "from": "gpt", "value": "The answer is Guitar" } ], "clip_score": 24.8125, "loss": 2.4913339614868164 }, { "id": "identity_319303", "image": "GeoQA+/images/2172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD为⊙O的内接四边形,已知∠BOD=140°,则∠BCD的度数是多少?\n选择:\n(A) 100°\n(B) 110°\n(C) 120°\n(D) 140°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 2.436896562576294 }, { "id": "identity_286869", "image": "A-OKVQA/images/458308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of race is being held? \nOptions:\n(A) sheep racing\n(B) horse racing\n(C) cat racing\n(D) dog racing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.921875, "loss": 4.157524108886719 }, { "id": "identity_39116", "image": "DVQA/images/bar_train_00170159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the 'belief' category across all bars?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 22.703125, "loss": 2.3686110973358154 }, { "id": "identity_147496", "image": "DocVQA/images/hjkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount mentioned in the letter?" }, { "from": "gpt", "value": "The answer is $25,000" } ], "clip_score": 19.984375, "loss": 1.8893917798995972 }, { "id": "identity_61577", "image": "FigureQA/images/50594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate ratio of Bubblegum to Green Yellow?" }, { "from": "gpt", "value": "The answer is 11:9" } ], "clip_score": 35.9375, "loss": 2.279541015625 }, { "id": "identity_227992", "image": "IconQA/images/26254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orange shapes are not squares?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.109375, "loss": 2.6390810012817383 }, { "id": "identity_188566", "image": "VQA2.0/images/351422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is taking place?" }, { "from": "gpt", "value": "The answer is Horse racing" } ], "clip_score": 30.234375, "loss": 2.7064096927642822 }, { "id": "identity_81834", "image": "UniGeo/images/2196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shaded sectors in circles A, B, and C are all equal in area, what is the measure of the central angle for each sector?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 31.015625, "loss": 1.7345836162567139 }, { "id": "identity_88983", "image": "UniGeo/images/1111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠BOD = 120° in the inscribed quadrilateral ABCD, what is the measure of ∠BCD?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 34.46875, "loss": 1.674647331237793 }, { "id": "identity_179624", "image": "TabMWP/images/16175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest restaurant rating recorded in the stem-and-leaf plot?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 42.3125, "loss": 3.137016773223877 }, { "id": "identity_207222", "image": "Super-CLEVR/images/superCLEVR_new_000785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 21.890625, "loss": 3.9185097217559814 }, { "id": "identity_69510", "image": "ScienceQA/images/13994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of the items shown in the image is not a solid?" }, { "from": "gpt", "value": "The answer is Ocean water" } ], "clip_score": 22.21875, "loss": 2.3069560527801514 }, { "id": "identity_106078", "image": "TextVQA/images/6cf56d2782c00d9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella in the image?" }, { "from": "gpt", "value": "The answer is Red and yellow" } ], "clip_score": 16.921875, "loss": 3.0003068447113037 }, { "id": "identity_225754", "image": "Super-CLEVR/images/superCLEVR_new_000553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are not blue?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 18.125, "loss": 4.245324611663818 }, { "id": "identity_90433", "image": "UniGeo/images/1691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O外一点,过点C作⊙O的切线,切点为B,连接AC交⊙O于点D,∠C=40°,求∠AED的度数是多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 26.234375, "loss": 1.7269818782806396 }, { "id": "identity_218108", "image": "A-OKVQA/images/12145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the flag being waved on the right side of the image?" }, { "from": "gpt", "value": "The answer is Blue with a red stripe" } ], "clip_score": 21.578125, "loss": 2.9745638370513916 }, { "id": "identity_137144", "image": "DocVQA/images/xkbl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Principal Investigator(s) listed in the document?" }, { "from": "gpt", "value": "The answer is Paul Saltman(s)" } ], "clip_score": 26.21875, "loss": 3.96730375289917 }, { "id": "identity_91355", "image": "Geometry3K/images/706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 5 in the figure?" }, { "from": "gpt", "value": "The answer is 56 degrees" } ], "clip_score": 31.484375, "loss": 2.4075264930725098 }, { "id": "identity_316483", "image": "A-OKVQA/images/447574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the food items is most likely to be eaten with utensils?\nChoices:\n(A) none\n(B) bread\n(C) soup\n(D) sandwich" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.15625, "loss": 3.8110790252685547 }, { "id": "identity_97", "image": "FigureQA/images/24597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.859375, "loss": 3.842097520828247 }, { "id": "identity_192768", "image": "A-OKVQA/images/126204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the smartphone on the table?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 15.2734375, "loss": 3.054480791091919 }, { "id": "identity_253465", "image": "A-OKVQA/images/495117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which option provides a sheltered and mobile place for people to sleep, and why is it more suitable than the other options?" }, { "from": "gpt", "value": "The answer is trailer" } ], "clip_score": 14.890625, "loss": 3.268519163131714 }, { "id": "identity_159143", "image": "GeoQA+/images/5106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that BC is the diameter of circle O and ∠AOC = 60°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 38.78125, "loss": 1.5808866024017334 }, { "id": "identity_48657", "image": "FigureQA/images/15298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color Violet Red the second largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 37.53125, "loss": 3.0725128650665283 }, { "id": "identity_157026", "image": "ScienceQA/images/6134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of soil texture appears to be present in the image?" }, { "from": "gpt", "value": "The answer is Loamy soil" } ], "clip_score": 24.40625, "loss": 3.2726833820343018 }, { "id": "identity_172775", "image": "IconQA/images/46089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sections in the shape?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.8125, "loss": 1.9206209182739258 }, { "id": "identity_291153", "image": "PMC-VQA/images/PMC7541359_Fig2_267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the consensus contours in the image? \nChoices:\n(A) Contours from individual physicians \n(B) Scans from MRI \n(C) Contours of the target volume \n(D) None of the options provided" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.65625, "loss": 4.526276111602783 }, { "id": "identity_73415", "image": "AI2D/images/3690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organs are involved in the digestion process as per the image?" }, { "from": "gpt", "value": "The answer is Ten" } ], "clip_score": 31.40625, "loss": 4.0740156173706055 }, { "id": "identity_122643", "image": "A-OKVQA/images/114744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible under the awning in the image?" }, { "from": "gpt", "value": "The answer is There are two people" } ], "clip_score": 23.578125, "loss": 3.438361167907715 }, { "id": "identity_238073", "image": "TabMWP/images/8211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of the students who scored 50 points or more?" }, { "from": "gpt", "value": "The answer is 71.6" } ], "clip_score": 30.234375, "loss": 2.3660149574279785 }, { "id": "identity_338305", "image": "PMC-VQA/images/PMC7506999_anec12731-fig-0003_258656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the defects in perfusion found?\nChoices:\n(A) Lower lobes\n(B) Right lung\n(C) Upper lobes\n(D) Left lung" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.90625, "loss": 3.434906482696533 }, { "id": "identity_291125", "image": "ChartQA/images/two_col_81371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point did the export reach its highest level?" }, { "from": "gpt", "value": "The answer is 2008" } ], "clip_score": 22.5, "loss": 1.5196746587753296 }, { "id": "identity_78380", "image": "AI2D/images/3083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'Cilia' refer to in the diagram, and what is its purpose in the cell?" }, { "from": "gpt", "value": "The answer is Cilia, movement and sensory functions" } ], "clip_score": 29.703125, "loss": 3.462862253189087 }, { "id": "identity_141723", "image": "DocVQA/images/ntnk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the reception on Wednesday, July 7?" }, { "from": "gpt", "value": "The answer is Park Lane Hotel, 36 Central Park South" } ], "clip_score": 23.890625, "loss": 1.9629393815994263 }, { "id": "identity_204916", "image": "VizWiz/images/VizWiz_train_00014339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side dishes are included in the frozen dinner?" }, { "from": "gpt", "value": "The answer is Stuffing and Vegetables" } ], "clip_score": 23.375, "loss": 2.2166006565093994 }, { "id": "identity_234932", "image": "PlotQA/images/38380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual expenditure on health care as a percentage of GDP for Least developed countries and Middle East & North Africa (all income levels) over the years 1995, 1996, and 1997, and which region has the higher average expenditure?" }, { "from": "gpt", "value": "The answer is Middle East & North Africa has the higher average expenditure" } ], "clip_score": 36.40625, "loss": 1.1908938884735107 }, { "id": "identity_123236", "image": "PMC-VQA/images/PMC7724709_Fig1_316990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the picture?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 26.6875, "loss": 2.4060020446777344 }, { "id": "identity_41024", "image": "FigureQA/images/68126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between Blue Violet and Aqua at x-axis label 20?" }, { "from": "gpt", "value": "The answer is 0.2" } ], "clip_score": 25.109375, "loss": 2.243076801300049 }, { "id": "identity_17065", "image": "PMC-VQA/images/PMC7482117_f2_251611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used for brain imaging in this case? \nChoices:\n(A) Angiography \n(B) CT scan \n(C) PET scan \n(D) MRI scan " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.5625, "loss": 3.7510178089141846 }, { "id": "identity_2598", "image": "IconQA/images/77491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a fish. Which picture is tenth?\nChoices:\n(A) turtle\n(B) fish\n(C) sub" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.84375, "loss": 4.348581790924072 }, { "id": "identity_275415", "image": "AI2D/images/300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web, which organism would immediately benefit from an increase in the amount of green plants, resulting in an increase in available energy? Is it the Rabbit, Frog, Snake, or Lion?\nChoices:\n(A) Rabbit\n(B) Frog\n(C) Snake\n(D) Lion" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 38.5, "loss": 3.4440956115722656 }, { "id": "identity_306705", "image": "TQA/images/question_images/aquifers_6523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What layer is beneath the aquifer?\nChoices:\n(A) Unconfined Aquifer\n(B) Confined Aquifer\n(C) Water Table\n(D) Confining Bed" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.625, "loss": 3.6952693462371826 }, { "id": "identity_278941", "image": "TabMWP/images/22229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of ducks that Mia counted at any pond in the city?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 27.875, "loss": 2.8066201210021973 }, { "id": "identity_72971", "image": "TQA/images/question_images/parts_skeleton_7272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two bones are connected by the pelvic girdle?" }, { "from": "gpt", "value": "The answer is sacrum and femur" } ], "clip_score": 31.21875, "loss": 2.8245115280151367 }, { "id": "identity_274415", "image": "Geometry3K/images/1846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of b in the parallelogram provided. \nOptions:\n(A) 2\n(B) 8\n(C) 9\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.953125, "loss": 3.2770466804504395 }, { "id": "identity_26978", "image": "PlotQA/images/129868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph contain any zero values ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.984375, "loss": 3.169156074523926 }, { "id": "identity_167547", "image": "IconQA/images/82935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of orange squares to the total number of squares?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 30.671875, "loss": 1.5288584232330322 }, { "id": "identity_10334", "image": "AI2D/images/3986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Conversion of food to usable energy in the cell is done by:\nChoices:\n(A) Glycogen granules\n(B) Mitochondrion\n(C) Ribosome\n(D) Golgi body" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 3.819061756134033 }, { "id": "identity_288156", "image": "DocVQA/images/phgg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which individual is serving as the chairman for the afternoon sessions?" }, { "from": "gpt", "value": "The answer is Doctor Robert Shank" } ], "clip_score": 23.21875, "loss": 3.695765733718872 }, { "id": "identity_76648", "image": "ScienceQA/images/18240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sandwich does Jerry have in his lunch?" }, { "from": "gpt", "value": "The answer is Peanut butter and jelly sandwich" } ], "clip_score": 21.015625, "loss": 1.1871528625488281 }, { "id": "identity_212984", "image": "TextVQA/images/03ee1c3f5cdc8110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color scheme of the aircraft's livery?" }, { "from": "gpt", "value": "The answer is White with blue and red accents" } ], "clip_score": 25.34375, "loss": 2.402631998062134 }, { "id": "identity_172540", "image": "TabMWP/images/5348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of coins in all the collections combined?" }, { "from": "gpt", "value": "The answer is 297" } ], "clip_score": 31.375, "loss": 2.262399911880493 }, { "id": "identity_31819", "image": "VizWiz/images/VizWiz_train_00010803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is it?" }, { "from": "gpt", "value": "The answer is unknown" } ], "clip_score": 15.140625, "loss": 3.799797773361206 }, { "id": "identity_319028", "image": "AI2D/images/3129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a thin hairlike outgrowth of an epidermal cell of a plant root that absorbs water and minerals from the soil?\nChoices:\n(A) phloem\n(B) root hair\n(C) epidermis\n(D) xylem" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.3125, "loss": 3.6643505096435547 }, { "id": "identity_135365", "image": "DVQA/images/bar_train_00197282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value indicated by a green circle for the category \"bridge\"?" }, { "from": "gpt", "value": "The answer is 10^7" } ], "clip_score": 20.390625, "loss": 2.5221126079559326 }, { "id": "identity_305198", "image": "ScienceQA/images/15649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) the surroundings . . . each greenhouse\n(B) each greenhouse . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.34375, "loss": 3.922257661819458 }, { "id": "identity_14458", "image": "DVQA/images/bar_train_00144288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest summed value?" }, { "from": "gpt", "value": "The answer is driver" } ], "clip_score": 24.546875, "loss": 4.1319122314453125 }, { "id": "identity_64071", "image": "AI2D/images/4211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock is formed directly from magma according to the diagram?" }, { "from": "gpt", "value": "The answer is Igneous rock" } ], "clip_score": 33.71875, "loss": 1.903276801109314 }, { "id": "identity_4392", "image": "UniGeo/images/3131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把一块直角三角板的直角顶点放在直尺的一边上,若∠2=42°,则∠1=()\nChoices:\n(A) 48°\n(B) 42°\n(C) 40°\n(D) 45°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.21875, "loss": 2.8417000770568848 }, { "id": "identity_182634", "image": "VQA-AS/images/14263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the man and the dog likely engaged in?" }, { "from": "gpt", "value": "The answer is Playing fetch" } ], "clip_score": 30.578125, "loss": 3.336052417755127 }, { "id": "identity_29282", "image": "IconQA/images/40637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are on the frame?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.09375, "loss": 2.412465810775757 }, { "id": "identity_259908", "image": "FigureQA/images/86292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the y-axis values between Dark Orange and Violet Red at x = 50?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.75, "loss": 2.799281358718872 }, { "id": "identity_77308", "image": "TQA/images/abc_question_images/sun_layers_14062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label R indicate in the image?" }, { "from": "gpt", "value": "The answer is Sunspots" } ], "clip_score": 32.3125, "loss": 3.267317533493042 }, { "id": "identity_117604", "image": "VQA2.0/images/16483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the sky?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 24.171875, "loss": 2.3007864952087402 }, { "id": "identity_221043", "image": "PMC-VQA/images/PMC6878058_ccr32451-fig-0003_74099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the circle indicating the location of the deep peroneal nerve?" }, { "from": "gpt", "value": "The answer is 2 mm" } ], "clip_score": 29.953125, "loss": 2.8498497009277344 }, { "id": "identity_49345", "image": "FigureQA/images/44074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any segments that are approximately equal in size?" }, { "from": "gpt", "value": "The answer is No, all segments are different in size" } ], "clip_score": 23.765625, "loss": 2.363283395767212 }, { "id": "identity_265898", "image": "VizWiz/images/VizWiz_train_00006443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the color of this tapestry?" }, { "from": "gpt", "value": "The answer is green red black yellow" } ], "clip_score": 30.40625, "loss": 3.2643213272094727 }, { "id": "identity_188227", "image": "PMC-VQA/images/PMC9402411_rcr21020-fig-0002_379370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this image show any signs of fibrosis in the lung tissue?" }, { "from": "gpt", "value": "The answer is No signs of fibrosis evident" } ], "clip_score": 34.15625, "loss": 3.352749824523926 }, { "id": "identity_149441", "image": "DocVQA/images/kzlg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percent authorized for the employee with the account number ending in 5227?" }, { "from": "gpt", "value": "The answer is 100.00%" } ], "clip_score": 20.0625, "loss": 1.301914095878601 }, { "id": "identity_22421", "image": "AI2D/images/4402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of leaves are Kidney shaped?\nChoices:\n(A) reniform\n(B) ovate\n(C) deltoid\n(D) linear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.609375, "loss": 3.802952527999878 }, { "id": "identity_221564", "image": "PMC-VQA/images/PMC9408430_genes-13-01431-f003_382438.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to capture this image of Case 1?" }, { "from": "gpt", "value": "The answer is Ultrasound Biomicroscopy (UBM)" } ], "clip_score": 34.25, "loss": 1.6274254322052002 }, { "id": "identity_197626", "image": "VQA2.0/images/229494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the person's shirt?" }, { "from": "gpt", "value": "The answer is Tie-dye" } ], "clip_score": 19.640625, "loss": 2.1033737659454346 }, { "id": "identity_10129", "image": "FigureQA/images/87812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Gold have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.625, "loss": 3.876060962677002 }, { "id": "identity_294633", "image": "GeoQA+/images/3720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, BD is the median of triangle ABC, with points E and F as the midpoints of BD and CE, respectively. If the area of triangle ABC is 12, what is the area of triangle AEF?\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.84375, "loss": 2.3200230598449707 }, { "id": "identity_96678", "image": "TabMWP/images/12130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Tom wants to save 20% of his net income, how much will he save?" }, { "from": "gpt", "value": "The answer is $1,883.20" } ], "clip_score": 27.796875, "loss": 1.8714523315429688 }, { "id": "identity_276225", "image": "ScienceQA/images/9491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.828125, "loss": 3.7590880393981934 }, { "id": "identity_231340", "image": "FigureQA/images/40145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Blue Violet and Steel Blue?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 37.5625, "loss": 1.7278416156768799 }, { "id": "identity_232035", "image": "ScienceQA/images/2667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees around its vertical axis, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is attract" } ], "clip_score": 21.90625, "loss": 3.101118564605713 }, { "id": "identity_204289", "image": "TextVQA/images/3be4dac11cd47d76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the denomination of the currency being handed over?" }, { "from": "gpt", "value": "The answer is Twenty-dollar bills" } ], "clip_score": 13.75, "loss": 2.5605533123016357 }, { "id": "identity_323213", "image": "DVQA/images/bar_train_00141531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the mediumpurple color represent?" }, { "from": "gpt", "value": "The answer is rush" } ], "clip_score": 19.96875, "loss": 3.2971858978271484 }, { "id": "identity_211407", "image": "PMC-VQA/images/PMC4109464_F2_308442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any identifiable sensory structures in the image?" }, { "from": "gpt", "value": "The answer is Antennae" } ], "clip_score": 26.90625, "loss": 3.0344700813293457 }, { "id": "identity_14390", "image": "IconQA/images/99564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many skunks are there?\nChoices:\n(A) 8\n(B) 2\n(C) 1\n(D) 7\n(E) 10" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.0, "loss": 2.8371899127960205 }, { "id": "identity_20147", "image": "IconQA/images/79857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of flowers even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.765625, "loss": 4.1631879806518555 }, { "id": "identity_47941", "image": "DVQA/images/bar_train_00020157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is the least preferred among the given options?" }, { "from": "gpt", "value": "The answer is refund" } ], "clip_score": 29.328125, "loss": 3.4676294326782227 }, { "id": "identity_171328", "image": "TabMWP/images/17237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students collected more than 6 cans of food?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.90625, "loss": 2.4690966606140137 }, { "id": "identity_57437", "image": "PlotQA/images/51121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the trade values for Georgia and Kyrgyz Republic the same?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.28125, "loss": 3.0904059410095215 }, { "id": "identity_238124", "image": "VQA2.0/images/370250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What safety measures are being followed in the man's attire and the environment around him?" }, { "from": "gpt", "value": "The answer is Protective suit, mask, gloves, ventilation, organized tools, protective sheet" } ], "clip_score": 25.078125, "loss": 3.1201610565185547 }, { "id": "identity_48347", "image": "FigureQA/images/68745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Bubblegum line ever go above the Dark Green line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.9375, "loss": 3.408196210861206 }, { "id": "identity_105631", "image": "VQA2.0/images/179060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the activity taking place in the image?" }, { "from": "gpt", "value": "The answer is Playing frisbee" } ], "clip_score": 27.90625, "loss": 2.294442653656006 }, { "id": "identity_95067", "image": "IconQA/images/10103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of the spinner landing on a non-yellow color?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 28.015625, "loss": 2.081472396850586 }, { "id": "identity_249809", "image": "PMC-VQA/images/PMC7437098_FIG1_237433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences in image characteristics between a CT chest scan and an MR chest scan, and how do these differences help in identifying the imaging technique used in case 2?" }, { "from": "gpt", "value": "The answer is CT chest scan" } ], "clip_score": 30.1875, "loss": 3.3588523864746094 }, { "id": "identity_100773", "image": "IconQA/images/18507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are triangles but not blue?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.671875, "loss": 2.6229946613311768 }, { "id": "identity_165071", "image": "Geometry3K/images/1669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the triangle?" }, { "from": "gpt", "value": "The answer is 15 + 15 + (15 * sqrt(2))" } ], "clip_score": 35.6875, "loss": 1.7584776878356934 }, { "id": "identity_284218", "image": "IconQA/images/87886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show if Ted is staying home one evening?\nChoices:\n(A) 9:00 A.M.\n(B) 9:00 P.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 3.7677345275878906 }, { "id": "identity_208038", "image": "PMC-VQA/images/PMC5831712_Fig2_282083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any markers placed on the image?" }, { "from": "gpt", "value": "The answer is Yes, there are markers" } ], "clip_score": 15.6484375, "loss": 2.7295303344726562 }, { "id": "identity_74085", "image": "AI2D/images/2275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the life cycle of a turtle as per the diagram?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.9375, "loss": 2.1995604038238525 }, { "id": "identity_196061", "image": "A-OKVQA/images/278923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the base of the vase?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 25.984375, "loss": 4.358157157897949 }, { "id": "identity_32548", "image": "DocVQA/images/yjcd0227_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much calories are required for 1 kg mammals?" }, { "from": "gpt", "value": "The answer is 93" } ], "clip_score": 18.859375, "loss": 2.9133567810058594 }, { "id": "identity_69512", "image": "ScienceQA/images/13994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item in the image is naturally occurring and not man-made?" }, { "from": "gpt", "value": "The answer is Ocean water" } ], "clip_score": 18.6875, "loss": 2.519793748855591 }, { "id": "identity_322536", "image": "DocVQA/images/gfhb0018_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What day of the week is this date in April?" }, { "from": "gpt", "value": "The answer is Saturday" } ], "clip_score": 18.09375, "loss": 3.8007407188415527 }, { "id": "identity_47121", "image": "DVQA/images/bar_train_00157030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'letter' and 'list' datasets on the 'movie' algorithm?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.84375, "loss": 3.512904405593872 }, { "id": "identity_60967", "image": "DVQA/images/bar_train_00164702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'fury' have a higher value in 'deal' than in 'auto'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.703125, "loss": 3.155938148498535 }, { "id": "identity_297287", "image": "PMC-VQA/images/PMC3374820_pone-0038732-g005_141511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which regions are indicated by white squares?\nChoices:\n(A) C and D\n(B) A and B\n(C) Only A\n(D) Only B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6171875, "loss": 4.283016204833984 }, { "id": "identity_58198", "image": "DVQA/images/bar_train_00094274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any category where all four values are above 20?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.765625, "loss": 3.322232723236084 }, { "id": "identity_165285", "image": "GeoQA+/images/599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the angle at point A is 60 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 29.4375, "loss": 1.8187236785888672 }, { "id": "identity_153746", "image": "ScienceQA/images/12261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is Ohio" } ], "clip_score": 31.84375, "loss": 3.168578624725342 }, { "id": "identity_157467", "image": "AI2D/images/2850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the muscle labeled on the chest, just below the shoulder?" }, { "from": "gpt", "value": "The answer is pectoralis major" } ], "clip_score": 29.59375, "loss": 2.31384539604187 }, { "id": "identity_184860", "image": "PMC-VQA/images/PMC6418144_Fig1_450835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a scale or measurement reference in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a 1 mm scale reference" } ], "clip_score": 23.140625, "loss": 2.227963447570801 }, { "id": "identity_226652", "image": "VQA-AS/images/11211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the specific actions of the kids that make the grandma look sad, and how do these actions contribute to her sadness?" }, { "from": "gpt", "value": "The answer is Pulling her arm and laughing at her" } ], "clip_score": 26.453125, "loss": 3.341660737991333 }, { "id": "identity_251748", "image": "TextVQA/images/273df1a961b0e2a8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many total saved pages are there in \"My Web\" and \"My Community's Web\" combined?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 24.390625, "loss": 2.2184386253356934 }, { "id": "identity_159160", "image": "GeoQA+/images/1491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠EAB before the fold?" }, { "from": "gpt", "value": "The answer is 0°" } ], "clip_score": 36.03125, "loss": 2.991621494293213 }, { "id": "identity_81859", "image": "UniGeo/images/2161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment BD in the given quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 35.1875, "loss": 2.676832914352417 }, { "id": "identity_237079", "image": "UniGeo/images/224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O的直径CD垂直于弦EF,垂足为G,若∠EOD=40°,求∠EOF的度数。" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 31.03125, "loss": 1.8375722169876099 }, { "id": "identity_136892", "image": "DVQA/images/bar_train_00172615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who preferred objects in the category \"step\"?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 26.78125, "loss": 2.706324338912964 }, { "id": "identity_193672", "image": "A-OKVQA/images/117098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model of the motorcycle?" }, { "from": "gpt", "value": "The answer is CBR 1000RR" } ], "clip_score": 30.140625, "loss": 1.660239577293396 }, { "id": "identity_30867", "image": "Geometry3K/images/1451.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given right triangle X Y Z with hypotenuse X Y, Y P is equal to Y Z. If m \\angle P Y Z = 26, find m \\angle X Z P.\nChoices:\n(A) 13\n(B) 26\n(C) 32\n(D) 64" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.765625, "loss": 2.3711018562316895 }, { "id": "identity_213688", "image": "TextVQA/images/26cb1858072bdfc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is prominently featured in the image?" }, { "from": "gpt", "value": "The answer is A tram or streetcar" } ], "clip_score": 31.765625, "loss": 3.065309524536133 }, { "id": "identity_254799", "image": "PMC-VQA/images/PMC3599802_F2_192545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure labeled in blue and explain the significance of the planning target volume (PTV) in radiation therapy treatment planning" }, { "from": "gpt", "value": "The answer is The structure labeled in blue is the Planning Target Volume (PTV)" } ], "clip_score": 26.875, "loss": 1.7965120077133179 }, { "id": "identity_295303", "image": "A-OKVQA/images/22487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which police force do the men holding instruments in uniforms resembling their colors represent? \n(A) mounties\n(B) nypd\n(C) scotland yard\n(D) lapd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.390625, "loss": 3.90564227104187 }, { "id": "identity_28752", "image": "PMC-VQA/images/PMC7781174_Fig1_334095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the arrows in the images indicate? \nChoices:\n(A) The location of the hospitalised patients \n(B) The different types of COVID symptoms \n(C) The areas of the chest with abnormal thymus hyperplasia \n(D) The normal aspects of the thymus gland. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.4375, "loss": 3.9785125255584717 }, { "id": "identity_93415", "image": "IconQA/images/60154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of polygon is this shape?" }, { "from": "gpt", "value": "The answer is Pentagon" } ], "clip_score": 27.546875, "loss": 3.3551013469696045 }, { "id": "identity_196983", "image": "VQA2.0/images/74533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible on the train platform?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.984375, "loss": 2.962078809738159 }, { "id": "identity_284027", "image": "ScienceQA/images/8270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Edgar's experiment to answer:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.421875, "loss": 4.161902904510498 }, { "id": "identity_168039", "image": "TabMWP/images/4335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average stock price for Houston Sporting Goods on Wednesday and Thursday?" }, { "from": "gpt", "value": "The answer is $11.50" } ], "clip_score": 32.96875, "loss": 1.6187289953231812 }, { "id": "identity_104261", "image": "VQA-AS/images/18492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is shown on the left side of the image?" }, { "from": "gpt", "value": "The answer is Oak" } ], "clip_score": 17.8125, "loss": 3.1892786026000977 }, { "id": "identity_96332", "image": "TabMWP/images/11425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags had an odd number of orange candies?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.40625, "loss": 2.775529623031616 }, { "id": "identity_284981", "image": "A-OKVQA/images/98636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what type of surface is the bird currently perched?\nChoices:\n(A) metal\n(B) glass\n(C) brick\n(D) concrete" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5234375, "loss": 4.908816337585449 }, { "id": "identity_303430", "image": "TextVQA/images/8526d48941ae62d9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on this poster?" }, { "from": "gpt", "value": "The answer is october 12" } ], "clip_score": 21.28125, "loss": 2.9427785873413086 }, { "id": "identity_156541", "image": "ScienceQA/images/12913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the magnetic pole orientation, what will be the interaction between the two magnets?" }, { "from": "gpt", "value": "The answer is The magnets will attract each other" } ], "clip_score": 19.609375, "loss": 2.6607208251953125 }, { "id": "identity_179727", "image": "IconQA/images/33440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the others?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.59375, "loss": 2.421717882156372 }, { "id": "identity_80050", "image": "UniGeo/images/3183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment CD?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.15625, "loss": 2.618314027786255 }, { "id": "identity_337698", "image": "DocVQA/images/kmnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is this memo addressed?" }, { "from": "gpt", "value": "The answer is EPIDEMIOLOGY SUMMER SESSION FACULTY" } ], "clip_score": 21.40625, "loss": 1.5893784761428833 }, { "id": "identity_28903", "image": "PMC-VQA/images/PMC4533796_Fig6_413971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the specimen shown in the image? \nChoices:\n(A) mice tumors \n(B) human tumors \n(C) mice brain tissue \n(D) human brain tissue " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.53125, "loss": 4.830618858337402 }, { "id": "identity_176609", "image": "IconQA/images/103128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pink marbles must be removed so that no pink marbles remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.375, "loss": 2.0697450637817383 }, { "id": "identity_220214", "image": "TextVQA/images/060540f814b4ba97.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fire hydrant in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 18.75, "loss": 3.4680569171905518 }, { "id": "identity_177009", "image": "IconQA/images/7130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the shape?" }, { "from": "gpt", "value": "The answer is 7 units" } ], "clip_score": 26.984375, "loss": 2.570632219314575 }, { "id": "identity_286727", "image": "TabMWP/images/31148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The school district compared the number of swings at each elementary school. What is the average number of swings across all schools?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 42.25, "loss": 2.6324617862701416 }, { "id": "identity_66246", "image": "ScienceQA/images/17660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the map?" }, { "from": "gpt", "value": "The answer is Iowa" } ], "clip_score": 32.28125, "loss": 2.794426202774048 }, { "id": "identity_1319", "image": "Super-CLEVR/images/superCLEVR_new_001192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The large red thing that is the same material as the tiny cruiser is what shape?" }, { "from": "gpt", "value": "The answer is school bus" } ], "clip_score": 23.21875, "loss": 4.229517459869385 }, { "id": "identity_65334", "image": "ScienceQA/images/13788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.546875, "loss": 2.1637256145477295 }, { "id": "identity_246458", "image": "DocVQA/images/tgcw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the meeting in minutes, and how many different topics are scheduled to be discussed?" }, { "from": "gpt", "value": "The answer is 90 minutes, 7 topics" } ], "clip_score": 21.484375, "loss": 2.5397984981536865 }, { "id": "identity_24633", "image": "PMC-VQA/images/PMC4403878_pone.0124652.g007_379357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk highlight? \nChoices:\n(A) ectopic induction of puc-lacZ \n(B) wild type expression in the closed arrows \n(C) wild type expression in the open arrows \n(D) wild type expression in the stalk region of the disc " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.484375, "loss": 4.666873931884766 }, { "id": "identity_130411", "image": "A-OKVQA/images/382104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the motorcycles equipped with a windshield?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.125, "loss": 3.4178149700164795 }, { "id": "identity_270663", "image": "DVQA/images/bar_train_00099289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the values in the chart follow a percentage scale?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.546875, "loss": 4.015727519989014 }, { "id": "identity_234396", "image": "IconQA/images/33547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the degree of rotation applied to the letter to achieve the transformation shown in the image?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 26.0, "loss": 1.789269208908081 }, { "id": "identity_285192", "image": "IconQA/images/12000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is it on the clock when Cindy is assisting her uncle in the garden this afternoon?\nChoices:\n(A) 1:00 P.M.\n(B) 1:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.890625, "loss": 3.547581911087036 }, { "id": "identity_303406", "image": "VQA2.0/images/261990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which is their left hand?" }, { "from": "gpt", "value": "The answer is one on letters" } ], "clip_score": 20.09375, "loss": 4.684977054595947 }, { "id": "identity_325008", "image": "DocVQA/images/qlhx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was the article \"Vertebrate Keratinization\" published?" }, { "from": "gpt", "value": "The answer is primate news" } ], "clip_score": 21.0625, "loss": 4.642529487609863 }, { "id": "identity_99984", "image": "TabMWP/images/8266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the quantity demanded equal the quantity supplied?" }, { "from": "gpt", "value": "The answer is No price" } ], "clip_score": 32.625, "loss": 4.388401508331299 }, { "id": "identity_317059", "image": "FigureQA/images/92799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the Maroon line have the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 3.978972911834717 }, { "id": "identity_200462", "image": "VizWiz/images/VizWiz_train_00014842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the napkin holder?" }, { "from": "gpt", "value": "The answer is Pink with white polka dots" } ], "clip_score": 21.09375, "loss": 2.6385278701782227 }, { "id": "identity_214383", "image": "TextVQA/images/375594cf604ff63e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the stadium where this game is being played?" }, { "from": "gpt", "value": "The answer is Oakland Coliseum" } ], "clip_score": 30.859375, "loss": 1.5332200527191162 }, { "id": "identity_192016", "image": "PMC-VQA/images/PMC6521358_Fig10_481108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any labeling present that indicates the body part being imaged?" }, { "from": "gpt", "value": "The answer is Yes, peripheral nerve" } ], "clip_score": 26.9375, "loss": 3.0152082443237305 }, { "id": "identity_336356", "image": "VQA2.0/images/428440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the same on both boats?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 21.34375, "loss": 3.014404773712158 }, { "id": "identity_17734", "image": "FigureQA/images/84006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Seafoam less than Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.796875, "loss": 3.7239387035369873 }, { "id": "identity_115851", "image": "TextVQA/images/bee12950e4213f2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is used in the organization's name on the document?" }, { "from": "gpt", "value": "The answer is English and French" } ], "clip_score": 17.59375, "loss": 3.5614683628082275 }, { "id": "identity_31237", "image": "TabMWP/images/6286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 3/4 of a kilogram of agave syrup?" }, { "from": "gpt", "value": "The answer is 0.75" } ], "clip_score": 31.296875, "loss": 2.041365623474121 }, { "id": "identity_287764", "image": "ChartQA/images/05411753006467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total when you add Ireland and Guadeloupe together?" }, { "from": "gpt", "value": "The answer is 25547" } ], "clip_score": 15.578125, "loss": 2.8722572326660156 }, { "id": "identity_168195", "image": "IconQA/images/32216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the rectangle was to be covered with square tiles each measuring 1 square foot, how many tiles would be needed?" }, { "from": "gpt", "value": "The answer is 15 tiles" } ], "clip_score": 32.375, "loss": 1.764270305633545 }, { "id": "identity_208452", "image": "VQA2.0/images/277379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many skis is the person using?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.609375, "loss": 3.852921724319458 }, { "id": "identity_67569", "image": "ScienceQA/images/9833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are present in this food web?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.015625, "loss": 2.1707935333251953 }, { "id": "identity_175172", "image": "IconQA/images/68687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours apart did Fred and Scott take out the trash?" }, { "from": "gpt", "value": "The answer is 1 hour" } ], "clip_score": 22.3125, "loss": 1.4949125051498413 }, { "id": "identity_231427", "image": "DVQA/images/bar_train_00067172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for all items across all stores?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 26.609375, "loss": 2.4571173191070557 }, { "id": "identity_200500", "image": "VQA-AS/images/17424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 29.28125, "loss": 3.021472215652466 }, { "id": "identity_186143", "image": "TextVQA/images/02c331aed99d9664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of book is shown in the image?" }, { "from": "gpt", "value": "The answer is The Quran" } ], "clip_score": 33.03125, "loss": 3.0985665321350098 }, { "id": "identity_140992", "image": "MapQA/images/map_745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with values not available (N/A)?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.171875, "loss": 3.2148215770721436 }, { "id": "identity_15503", "image": "Super-CLEVR/images/superCLEVR_new_000729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green fighters are to the left of the bus on the left side of the brown double bus that is on the right side of the large green aeroplane?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.46875, "loss": 2.5123493671417236 }, { "id": "identity_116221", "image": "VQA-AS/images/3509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of furniture are visible in the room?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.953125, "loss": 2.032437324523926 }, { "id": "identity_107358", "image": "VQA-AS/images/6974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is sleeping on the rug?" }, { "from": "gpt", "value": "The answer is The animal is a dog" } ], "clip_score": 24.265625, "loss": 3.198176383972168 }, { "id": "identity_268323", "image": "UniGeo/images/2562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, the basic shape forming the retractable door is a rhombus, with each side of the small rhombus measuring 0.4 meters. When the retractable door is fully extended to 8 meters wide, one internal angle of the rhombus is 60°. If the thickness of the middle partition is not considered, what is the value of n in the diagram?\nChoices:\n(A) 10\n(B) 15\n(C) 20\n(D) 25" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.546875, "loss": 2.545259952545166 }, { "id": "identity_22885", "image": "TextVQA/images/364744b9ff7ee8e1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the store?" }, { "from": "gpt", "value": "The answer is findus" } ], "clip_score": 28.796875, "loss": 3.5795907974243164 }, { "id": "identity_258248", "image": "IconQA/images/60447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each strawberry weighs 50 grams, what is the total weight of all the strawberries combined?" }, { "from": "gpt", "value": "The answer is 50 grams" } ], "clip_score": 27.359375, "loss": 1.6067571640014648 }, { "id": "identity_76935", "image": "TQA/images/question_images/human_system_respiratory_3598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the respiratory system is indicated by the label \"Bronchus\"?" }, { "from": "gpt", "value": "The answer is Bronchus" } ], "clip_score": 31.796875, "loss": 2.0980536937713623 }, { "id": "identity_70945", "image": "TQA/images/abc_question_images/human_system_circulatory_11378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label points to the left atrium?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.0, "loss": 2.8145463466644287 }, { "id": "identity_137557", "image": "DocVQA/images/ykpj0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main reason Shasta has been able to arrange fructose nationwide according to the document?" }, { "from": "gpt", "value": "The answer is Size and distribution ability" } ], "clip_score": 31.8125, "loss": 4.909043312072754 }, { "id": "identity_308230", "image": "PMC-VQA/images/PMC4030533_F3_290784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging methods were used to diagnose PCLD in the patients mentioned in the caption?\n\nChoices:\n(A) Chest X-ray and Abdominal CT scan\n(B) Axial CT-scan, Abdominal ultrasonography, and MRI\n(C) Abdominal CT-scan, Chest MRI, and Abdominal ultrasonography\n(D) Abdominal MRI, Renal ultrasonography, and Lung CT scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.65625, "loss": 3.658067226409912 }, { "id": "identity_39767", "image": "ChartQA/images/two_col_42769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many labels are there on the Y-axis?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 22.421875, "loss": 2.579364538192749 }, { "id": "identity_99882", "image": "IconQA/images/24933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents the side of a square, what would be the perimeter of the square?" }, { "from": "gpt", "value": "The answer is 24 centimeters" } ], "clip_score": 27.890625, "loss": 1.892778754234314 }, { "id": "identity_122470", "image": "TextVQA/images/4ef05378cb434836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the street sign for N Bassett St?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.40625, "loss": 3.5970511436462402 }, { "id": "identity_5268", "image": "TQA/images/question_images/seasons_1045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many seasons of the earth are shown?\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.625, "loss": 3.439195156097412 }, { "id": "identity_42862", "image": "FigureQA/images/37200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Royal Blue in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 1/3 or 33.33%" } ], "clip_score": 37.71875, "loss": 1.4161872863769531 }, { "id": "identity_241186", "image": "VizWiz/images/VizWiz_train_00000697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What objects are located to the left and right of the napkin, and what is their relative position to each other?" }, { "from": "gpt", "value": "The answer is Plate, red box, napkin, left to right" } ], "clip_score": 24.578125, "loss": 3.140129566192627 }, { "id": "identity_182941", "image": "PMC-VQA/images/PMC8121193_FIG2_440377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the largest blue arrow?" }, { "from": "gpt", "value": "The answer is Left lung" } ], "clip_score": 26.515625, "loss": 3.8364064693450928 }, { "id": "identity_338335", "image": "IconQA/images/60231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 16.78125, "loss": 1.7632505893707275 }, { "id": "identity_218386", "image": "PMC-VQA/images/PMC6824786_F3_59798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is shown in the image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 27.109375, "loss": 2.9686520099639893 }, { "id": "identity_109416", "image": "PMC-VQA/images/PMC5746883_f3_258365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of pathology?" }, { "from": "gpt", "value": "The answer is Possible, but not definitive without context" } ], "clip_score": 29.046875, "loss": 4.023269176483154 }, { "id": "identity_325026", "image": "TQA/images/question_images/sun_layers_4062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the warmest part of this?\n\nChoices:\n(A) Core\n(B) Sunspots\n(C) Convection Zone\n(D) Photosphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.796875, "loss": 4.308047294616699 }, { "id": "identity_21655", "image": "ScienceQA/images/12017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these two objects have in common?\nChoices:\n(A) soft\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.25, "loss": 4.125154972076416 }, { "id": "identity_272924", "image": "IconQA/images/80690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more leaves than ladybugs?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.21875, "loss": 3.798578977584839 }, { "id": "identity_188791", "image": "PMC-VQA/images/PMC7601842_diagnostics-10-00748-f001_287965.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the structure indicated by the white arrow compared to the surrounding tissue?" }, { "from": "gpt", "value": "The answer is Lower radiodensity than surrounding tissue" } ], "clip_score": 28.984375, "loss": 1.8474249839782715 }, { "id": "identity_14638", "image": "A-OKVQA/images/489845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of room are the kids in?\nChoices:\n(A) bedroom\n(B) recreation\n(C) bathroom\n(D) kitchen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.265625, "loss": 4.301236152648926 }, { "id": "identity_70481", "image": "ScienceQA/images/16955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the island country to the east of the highlighted country?" }, { "from": "gpt", "value": "The answer is Haiti" } ], "clip_score": 33.25, "loss": 2.224740505218506 }, { "id": "identity_158997", "image": "UniGeo/images/2294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOB in the circle?" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 34.40625, "loss": 1.7826173305511475 }, { "id": "identity_102234", "image": "TabMWP/images/25888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest number of ducks at a pond?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 26.4375, "loss": 2.7539010047912598 }, { "id": "identity_190107", "image": "TextVQA/images/d7e1c905d63340f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the bus line displayed on the sign?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 23.609375, "loss": 1.9735380411148071 }, { "id": "identity_159061", "image": "GeoQA+/images/278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠3 is 50° and l1 is parallel to l2, what is the measure of ∠2?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 27.796875, "loss": 1.4618297815322876 }, { "id": "identity_118033", "image": "TextVQA/images/cc1f8773e7dce866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many points did player number 1 from the Wizards score?" }, { "from": "gpt", "value": "The answer is 10 points" } ], "clip_score": 29.359375, "loss": 1.7575057744979858 }, { "id": "identity_122963", "image": "TextVQA/images/9a6693f2ed5e340e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap worn by the coach or player not in uniform?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.9375, "loss": 3.1327126026153564 }, { "id": "identity_119834", "image": "A-OKVQA/images/429759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella held by the person in the center of the image?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 19.515625, "loss": 3.148524761199951 }, { "id": "identity_99588", "image": "TabMWP/images/17761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Britney has a budget of $20, how many pounds of bittersweet chocolate can she buy without exceeding her budget?" }, { "from": "gpt", "value": "The answer is 7 pounds" } ], "clip_score": 36.1875, "loss": 2.9558699131011963 }, { "id": "identity_258823", "image": "TextVQA/images/279d662c796eac37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the character Manduck influence the events in the comic, and what are the consequences of his actions?" }, { "from": "gpt", "value": "The answer is Manduck's hypnosis causes chaos and confusion among the characters" } ], "clip_score": 25.5625, "loss": 2.8709335327148438 }, { "id": "identity_125458", "image": "VQA2.0/images/133273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cabinet reflected in the mirror?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 28.734375, "loss": 4.014065742492676 }, { "id": "identity_84731", "image": "Geometry3K/images/1865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the diameter of \\odot B is 30 units, what is the radius of \\odot B?" }, { "from": "gpt", "value": "The answer is 15 units" } ], "clip_score": 29.734375, "loss": 1.607068657875061 }, { "id": "identity_114228", "image": "TextVQA/images/0e5fee6f27d73694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is predominantly used in the comic?" }, { "from": "gpt", "value": "The answer is German" } ], "clip_score": 22.46875, "loss": 3.066343069076538 }, { "id": "identity_130607", "image": "A-OKVQA/images/393630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the bench?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.59375, "loss": 3.870302200317383 }, { "id": "identity_243983", "image": "IconQA/images/91495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not blue, and what is the sum of the numerator and the denominator of this fraction?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 25.984375, "loss": 3.7135753631591797 }, { "id": "identity_254013", "image": "IconQA/images/73063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Tina starts riding her bike at 7:00 P.M. and rides for 45 minutes, what time will it be when she finishes her ride?" }, { "from": "gpt", "value": "The answer is 7:45 P.M" } ], "clip_score": 25.125, "loss": 1.1364388465881348 }, { "id": "identity_74521", "image": "TQA/images/question_images/human_system_digestive_3709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, which organ is labeled as the initial part of the large intestine?" }, { "from": "gpt", "value": "The answer is Ascending colon" } ], "clip_score": 31.796875, "loss": 1.8653935194015503 }, { "id": "identity_311347", "image": "PMC-VQA/images/PMC5265783_F1_118893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was used to capture this? \nChoices:\n(A) CT Scan \n(B) Ultrasound \n(C) Magnetic Resonance Imaging \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.640625, "loss": 3.997178316116333 }, { "id": "identity_326405", "image": "AI2D/images/4190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does b represent in this diagram?\nChoices:\n(A) melting\n(B) weathering\n(C) crystalisation\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.671875, "loss": 3.994668483734131 }, { "id": "identity_308356", "image": "TabMWP/images/13500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Bob recalled how many goals he had made in soccer practice each day. What is the median of these numbers?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.8125, "loss": 2.615579605102539 }, { "id": "identity_227837", "image": "TextVQA/images/3704b4d100aaa20c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes are open for cash payment, and how many lanes are closed?" }, { "from": "gpt", "value": "The answer is 1 open for cash, 2 closed" } ], "clip_score": 24.421875, "loss": 1.9168603420257568 }, { "id": "identity_73940", "image": "ScienceQA/images/10611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a human produced by this cross will have Thomsen disease?" }, { "from": "gpt", "value": "The answer is 4/4 or 100%" } ], "clip_score": 23.578125, "loss": 1.2676206827163696 }, { "id": "identity_327251", "image": "A-OKVQA/images/3528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely relation of the individual pushing the stroller to the child in it?\nChoices:\n(A) grandmother\n(B) mother\n(C) cousin\n(D) aunt" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.8824143409729 }, { "id": "identity_238646", "image": "PMC-VQA/images/PMC7706278_Fig2_311596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the anatomical structures surrounding the area of free-air accumulation and describe their relative positions" }, { "from": "gpt", "value": "The answer is Spleen (left), Pancreas (anterior), Ascending colon (right), Aorta (posterior)" } ], "clip_score": 25.234375, "loss": 2.0716679096221924 }, { "id": "identity_277218", "image": "ScienceQA/images/3969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Oregon?\n(A) Salem\n(B) Helena\n(C) Knoxville\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.921875, "loss": 4.162690162658691 }, { "id": "identity_94666", "image": "IconQA/images/23980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red houses are there in the sequence?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 12.6171875, "loss": 2.2105252742767334 }, { "id": "identity_124224", "image": "TextVQA/images/8a11469dcd4fba7d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of glasses are placed directly under the tap handles?" }, { "from": "gpt", "value": "The answer is Beer glasses" } ], "clip_score": 29.96875, "loss": 2.384507656097412 }, { "id": "identity_39725", "image": "DVQA/images/bar_train_00024741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the \"crowd\" bar and the \"stuff\" bar?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 20.890625, "loss": 2.9430654048919678 }, { "id": "identity_185080", "image": "VizWiz/images/VizWiz_train_00009445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the surface on which the LPG cylinders are placed?" }, { "from": "gpt", "value": "The answer is Concrete" } ], "clip_score": 36.03125, "loss": 2.222256898880005 }, { "id": "identity_82135", "image": "GeoQA+/images/4279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that the perimeter of parallelogram ABCD is 16 and the length of diagonal BD is 5, what is the length of segment OD?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 32.3125, "loss": 1.491976261138916 }, { "id": "identity_135018", "image": "FigureQA/images/99868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value of Lawn Green on the y-axis label?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 28.078125, "loss": 3.010063886642456 }, { "id": "identity_303987", "image": "Geometry3K/images/1780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle G.\nChoices:\n(A) 102\n(B) 107\n(C) 112\n(D) 122" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.125, "loss": 2.5821564197540283 }, { "id": "identity_211920", "image": "A-OKVQA/images/202990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for pedestrians?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.9375, "loss": 3.105541467666626 }, { "id": "identity_319317", "image": "A-OKVQA/images/150685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which group could divide all of these apples evenly among its members?\nChoices:\n(A) beatles\n(B) nirvana\n(C) cream\n(D) green day" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.15625, "loss": 3.741093397140503 }, { "id": "identity_200190", "image": "PMC-VQA/images/PMC9369276_jcm-11-04371-f004_369686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of joint replacement is visible in the image?" }, { "from": "gpt", "value": "The answer is Hip joint replacement" } ], "clip_score": 29.421875, "loss": 2.6426966190338135 }, { "id": "identity_193286", "image": "PMC-VQA/images/PMC8126771_fig01_443238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the highlighted structure in the right image relative to the scale?" }, { "from": "gpt", "value": "The answer is Diagonally from top left to bottom right" } ], "clip_score": 20.171875, "loss": 2.9896459579467773 }, { "id": "identity_76305", "image": "ScienceQA/images/18656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction would you travel from Nevada to reach Kansas?" }, { "from": "gpt", "value": "The answer is East" } ], "clip_score": 30.53125, "loss": 2.532414197921753 }, { "id": "identity_142888", "image": "FigureQA/images/20889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 34.28125, "loss": 3.2855324745178223 }, { "id": "identity_218017", "image": "VQA2.0/images/524788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the train in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 17.328125, "loss": 3.091667652130127 }, { "id": "identity_300469", "image": "GeoQA+/images/1117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,▱ABCD的一边AB为直径的⊙O过点C,若∠AOC=70°,则∠BAD等于多少?\n选项:\n(A) 145°\n(B) 140°\n(C) 135°\n(D) 130°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 2.1964752674102783 }, { "id": "identity_120862", "image": "VQA-RAD/images/synpic33689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of bowel obstruction?" }, { "from": "gpt", "value": "The answer is No signs of bowel obstruction" } ], "clip_score": 24.859375, "loss": 2.3753013610839844 }, { "id": "identity_310511", "image": "GeoQA+/images/5373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,菱形ABCD的面积为24cm²,对角线BD长6cm,点O为BD的中点,过点A作AE⊥BC交CB的延长线于点E,连接OE,则线段OE的长度是多少?\n\n选项:\n(A) 3cm\n(B) 4cm\n(C) 4.8cm\n(D) 5cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.640625, "loss": 2.592984676361084 }, { "id": "identity_116116", "image": "VQA2.0/images/521867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many photo frames are there on the wall?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.390625, "loss": 2.32955265045166 }, { "id": "identity_10434", "image": "GEOS/images/033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram at the right, the angles are represented as shown. Find the measure of angle DBC.\nChoices:\n(A) 24 degrees\n(B) 125 degrees\n(C) 155 degrees\n(D) 158 degrees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.59375, "loss": 2.44138765335083 }, { "id": "identity_237420", "image": "FigureQA/images/87630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the least variation in its values over the entire x-axis range?" }, { "from": "gpt", "value": "The answer is Light Green" } ], "clip_score": 27.078125, "loss": 3.83716082572937 }, { "id": "identity_153753", "image": "AI2D/images/784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the diagram?" }, { "from": "gpt", "value": "The answer is Nitrogen Cycle" } ], "clip_score": 33.40625, "loss": 1.6443535089492798 }, { "id": "identity_239990", "image": "DocVQA/images/spcg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter and who is the chief of the Bureau of Public Health Nutrition?" }, { "from": "gpt", "value": "The answer is June 30, 1961, Helen E. Walsh" } ], "clip_score": 29.4375, "loss": 2.3313488960266113 }, { "id": "identity_311212", "image": "UniGeo/images/2514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,圆O是△ACD的外接圆,AB是圆O的直径,∠BAD=60°,则∠C的度数是多少?\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.984375, "loss": 2.5561351776123047 }, { "id": "identity_315478", "image": "A-OKVQA/images/356010.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of bears are these?\nChoices:\n(A) black\n(B) polar\n(C) grizzly\n(D) panda" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.21875, "loss": 4.179219722747803 }, { "id": "identity_271893", "image": "UniGeo/images/30.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, where angle C is 90°, point D lies on side BC, and DE is parallel to AB. If angle CDE is 150°, what is the measure of angle A? \n(A) 30°\n(B) 60°\n(C) 120°\n(D) 150°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.0, "loss": 2.651165008544922 }, { "id": "identity_189735", "image": "A-OKVQA/images/311208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.78125, "loss": 3.4951224327087402 }, { "id": "identity_229850", "image": "PMC-VQA/images/PMC4164806_fig4_320401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image shows the most severe blood vessel changes in a rat model with hypertension, and describe the specific histological features that indicate these changes?" }, { "from": "gpt", "value": "The answer is Image B shows severe changes like thickened walls and fibrosis" } ], "clip_score": 25.046875, "loss": 3.2169573307037354 }, { "id": "identity_43745", "image": "FigureQA/images/24577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used to represent different sections in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.78125, "loss": 2.480025053024292 }, { "id": "identity_242367", "image": "A-OKVQA/images/48759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest road construction is taking place, and how do they collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is High-visibility clothing, traffic management, and potential barriers" } ], "clip_score": 23.9375, "loss": 3.890434741973877 }, { "id": "identity_96806", "image": "IconQA/images/51712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of buckets to crabs in the image?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 32.75, "loss": 1.6255741119384766 }, { "id": "identity_200498", "image": "PMC-VQA/images/PMC7837788_fig3_353592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any fibrotic changes be seen in the lung parenchyma?" }, { "from": "gpt", "value": "The answer is No visible fibrotic changes" } ], "clip_score": 32.21875, "loss": 2.9599220752716064 }, { "id": "identity_337479", "image": "A-OKVQA/images/215549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This elevated train is part of the public transportation system of which major US city?\nChoices:\n(A) chicago\n(B) philadelphia\n(C) new york\n(D) boston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 3.5081324577331543 }, { "id": "identity_115063", "image": "A-OKVQA/images/165200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the tournament being played, as indicated by the signage in the background?" }, { "from": "gpt", "value": "The answer is Bank of the West" } ], "clip_score": 22.3125, "loss": 2.269831418991089 }, { "id": "identity_131894", "image": "TextVQA/images/cd4f7cee4adb50b6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the genre of the book as mentioned on the cover?" }, { "from": "gpt", "value": "The answer is Science Fiction Novel" } ], "clip_score": 29.734375, "loss": 2.484107255935669 }, { "id": "identity_259101", "image": "GeoQA+/images/5581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD为菱形,A,B两点的坐标分别是(√{3},0),(0,1),点C,D在坐标轴上,求菱形ABCD的面积。" }, { "from": "gpt", "value": "The answer is \\(2\\sqrt{3}\\)" } ], "clip_score": 29.09375, "loss": 2.246694326400757 }, { "id": "identity_321794", "image": "PMC-VQA/images/PMC5535241_nanomaterials-07-00175-f002_197225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images does this caption refer to?\nChoices:\n(A) 2D topography images \n(B) 4D topography images \n(C) 3D topography images \n(D) 1D topography images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.421875, "loss": 4.127145767211914 }, { "id": "identity_203600", "image": "VizWiz/images/VizWiz_train_00010333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the laptop placed on?" }, { "from": "gpt", "value": "The answer is Wooden desk" } ], "clip_score": 26.28125, "loss": 2.394649028778076 }, { "id": "identity_83191", "image": "Geometry3K/images/905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radius of circle R?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 35.03125, "loss": 2.468313455581665 }, { "id": "identity_265382", "image": "FigureQA/images/99531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the highest value among the options provided?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 3.8525054454803467 }, { "id": "identity_266264", "image": "DVQA/images/bar_train_00097868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many algorithms is the accuracy less than 4 in at least one dataset?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 32.625, "loss": 3.4390625953674316 }, { "id": "identity_127406", "image": "Super-CLEVR/images/superCLEVR_new_001552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 21.140625, "loss": 3.360072374343872 }, { "id": "identity_65518", "image": "AI2D/images/1207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the grasshopper is responsible for its ability to jump?" }, { "from": "gpt", "value": "The answer is Hind legs" } ], "clip_score": 34.21875, "loss": 2.5293996334075928 }, { "id": "identity_265970", "image": "TextVQA/images/b826fb536795abf4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time displayed on the clock with the two little figures?" }, { "from": "gpt", "value": "The answer is 5.35" } ], "clip_score": 18.890625, "loss": 2.243838310241699 }, { "id": "identity_184282", "image": "VQA2.0/images/429384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sofa?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.421875, "loss": 3.648258686065674 }, { "id": "identity_324290", "image": "DocVQA/images/rlmg0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many copies are distributed to the Defense Documentation Center?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 26.375, "loss": 2.159107208251953 }, { "id": "identity_125893", "image": "VQA-AS/images/15172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the sun?" }, { "from": "gpt", "value": "The answer is Circle" } ], "clip_score": 18.3125, "loss": 3.6379668712615967 }, { "id": "identity_126558", "image": "Super-CLEVR/images/superCLEVR_new_001507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more airplanes than motorbikes in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.859375, "loss": 3.198237895965576 }, { "id": "identity_33398", "image": "A-OKVQA/images/386272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people in line concerned about?\nChoices:\n(A) buying tickets\n(B) reclaiming baggage\n(C) buying food\n(D) making reservations" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.3125, "loss": 4.54074764251709 }, { "id": "identity_310005", "image": "TabMWP/images/11102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A pet store owner had her staff count the number of fish in each tank. How many tanks have exactly this number of fish?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 36.65625, "loss": 2.229668140411377 }, { "id": "identity_221346", "image": "A-OKVQA/images/296797.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the streets mentioned on the side of the vehicle?" }, { "from": "gpt", "value": "The answer is Powell and Hyde Streets" } ], "clip_score": 18.75, "loss": 1.9942187070846558 }, { "id": "identity_109484", "image": "A-OKVQA/images/58684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus in the foreground?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.15625, "loss": 3.281269073486328 }, { "id": "identity_275907", "image": "PlotQA/images/152689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bars?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.109375, "loss": 2.3717944622039795 }, { "id": "identity_210199", "image": "VQA-AS/images/8540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Checkered" } ], "clip_score": 30.328125, "loss": 2.5011651515960693 }, { "id": "identity_193736", "image": "A-OKVQA/images/480587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the majority of the helmets worn by the riders?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.734375, "loss": 3.6501877307891846 }, { "id": "identity_91233", "image": "Geometry3K/images/140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of \\angle 2" }, { "from": "gpt", "value": "The answer is 32°" } ], "clip_score": 35.03125, "loss": 1.7065343856811523 }, { "id": "identity_223371", "image": "PMC-VQA/images/PMC7820043_Fig2_347188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of calcification within the pelvic area?" }, { "from": "gpt", "value": "The answer is No signs of calcification" } ], "clip_score": 28.609375, "loss": 3.372401237487793 }, { "id": "identity_156328", "image": "ScienceQA/images/12271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state shares the longest border with California?" }, { "from": "gpt", "value": "The answer is Nevada" } ], "clip_score": 27.75, "loss": 2.040191888809204 }, { "id": "identity_256434", "image": "TabMWP/images/4341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Darnel buys 2.6 pounds of muesli and 1.5 pounds of granola, how much does he spend in total?" }, { "from": "gpt", "value": "The answer is $14.20" } ], "clip_score": 28.28125, "loss": 1.938372254371643 }, { "id": "identity_49332", "image": "FigureQA/images/50635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Aqua section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 33%" } ], "clip_score": 31.84375, "loss": 2.181615114212036 }, { "id": "identity_91552", "image": "UniGeo/images/1924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point M is the midpoint of line segment AB and point N is on line segment MB, with AB=12 and the ratio of AM to BN is 3:1, what is the length of line segment BM?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 22.984375, "loss": 2.3971903324127197 }, { "id": "identity_310744", "image": "Super-CLEVR/images/superCLEVR_new_001474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the vehicle in front of the gray object the same size as the rubber object to the left of the brown double bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.109375, "loss": 5.016970634460449 }, { "id": "identity_7890", "image": "FigureQA/images/70387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Gold have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.546875, "loss": 3.917776107788086 }, { "id": "identity_283384", "image": "A-OKVQA/images/523357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the sidecar containing the small dog? \n(A) yellow \n(B) blue \n(C) white \n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.96875, "loss": 4.375109672546387 }, { "id": "identity_87880", "image": "UniGeo/images/687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the triangles ABC and DEF are similar with a similarity ratio of 1:2 and the length of DE is 4, what is the length of AC?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.015625, "loss": 2.084984064102173 }, { "id": "identity_14145", "image": "PMC-VQA/images/PMC4079070_F9_302790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in higher magnification in the (A) area of the image? \nChoices:\n(A) The cortical gyri in the occipital cortex \n(B) The cortical gyri in the prefrontal cortex \n(C) The cerebellum \n(D) The thalamus. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.625, "loss": 4.27596378326416 }, { "id": "identity_17313", "image": "TQA/images/question_images/human_system_reproductory_7005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a structure outside the vagina?\nChoices:\n(A) Labium Majora\n(B) Cervix\n(C) Ovary\n(D) Corpus of uterus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.734375, "loss": 3.908747434616089 }, { "id": "identity_280910", "image": "PlotQA/images/102357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any zero values present in the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.921875, "loss": 3.329420566558838 }, { "id": "identity_307169", "image": "DVQA/images/bar_train_00108380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the skyblue color represent?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 22.890625, "loss": 4.034306049346924 }, { "id": "identity_280274", "image": "DocVQA/images/kzbn0226_47.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the page number?" }, { "from": "gpt", "value": "The answer is 42" } ], "clip_score": 26.0, "loss": 2.9436194896698 }, { "id": "identity_39478", "image": "FigureQA/images/83308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Turquoise intersect with Dark Violet at any point on the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.140625, "loss": 2.7468152046203613 }, { "id": "identity_100601", "image": "IconQA/images/55842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 1.5 inches were trimmed from the end of the twig, how long would it remain?" }, { "from": "gpt", "value": "The answer is 3.75 inches" } ], "clip_score": 36.59375, "loss": 1.6646203994750977 }, { "id": "identity_38046", "image": "PMC-VQA/images/PMC5707219_Fig6_245867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the patient's medical history? \nChoices:\n(A) Diverticulitis \n(B) Appendicitis \n(C) Crohn's Disease \n(D) Pancreatitis " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.648989200592041 }, { "id": "identity_25735", "image": "DVQA/images/bar_train_00063151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of grave in score?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 26.015625, "loss": 2.265580654144287 }, { "id": "identity_239259", "image": "IconQA/images/66297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of acorns to squirrels, and is this ratio greater than 1?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 28.03125, "loss": 2.98045015335083 }, { "id": "identity_9005", "image": "VQA2.0/images/184697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color uniform is the man bent over wearing?" }, { "from": "gpt", "value": "The answer is red and yellow" } ], "clip_score": 19.3125, "loss": 3.2995641231536865 }, { "id": "identity_8452", "image": "PMC-VQA/images/PMC8380352_Fig5_38840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the light blue circles represent in the image? \nChoices:\n(A) Blood cells \n(B) Iron-positive cells \n(C) Green-colored cells \n(D) Normal cells " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.09375, "loss": 4.5313720703125 }, { "id": "identity_174772", "image": "TabMWP/images/31378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Connor wants to maintain an average of 3 photographs per day over the next 2 days, how many photographs does he need to take?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.03125, "loss": 3.42077374458313 }, { "id": "identity_239737", "image": "GeoQA+/images/551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点E是线段CD的中点,F是线段CE的中点,若AB=16cm,则线段EF=cm.()" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.140625, "loss": 3.05047607421875 }, { "id": "identity_202120", "image": "PMC-VQA/images/PMC9105292_materials-15-03338-f004_281330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the structures in the image uniform in size?" }, { "from": "gpt", "value": "The answer is No, they vary in size" } ], "clip_score": 23.0, "loss": 2.787490129470825 }, { "id": "identity_283594", "image": "VQA-AS/images/18596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the dog currently awake?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.984375, "loss": 3.7540335655212402 }, { "id": "identity_193990", "image": "PMC-VQA/images/PMC5804599_Fig3_273245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any distinguishable letters or numbers on this image?" }, { "from": "gpt", "value": "The answer is Yes, the letter 'a'" } ], "clip_score": 23.828125, "loss": 2.768670082092285 }, { "id": "identity_144474", "image": "DocVQA/images/gqxw0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the transmission?" }, { "from": "gpt", "value": "The answer is 01'13" } ], "clip_score": 20.953125, "loss": 3.08737850189209 }, { "id": "identity_244076", "image": "GeoQA+/images/3223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 已知正方形ABCD的边长为4,点P是边CD上一点,且CP=3,连接BP,Q为BP的中点,过点Q作QM⊥PB交AC于点M,连接MB,MP,求三角形MQB的面积。" }, { "from": "gpt", "value": "The answer is 0.875" } ], "clip_score": 26.15625, "loss": 2.223788261413574 }, { "id": "identity_68688", "image": "ScienceQA/images/377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the thin brown stalk in the moss life cycle?" }, { "from": "gpt", "value": "The answer is It elevates the spore capsule for dispersal" } ], "clip_score": 38.5625, "loss": 2.8827712535858154 }, { "id": "identity_30259", "image": "TQA/images/question_images/glaciers_6929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many zones can you see?\nChoices:\n(A) 1\n(B) 2\n(C) 9\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.8203125, "loss": 3.2876555919647217 }, { "id": "identity_84261", "image": "GeoQA+/images/5865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a person walks from the bottom to the top of the slope, how much vertical height is gained per meter of slope length?" }, { "from": "gpt", "value": "The answer is 0.6m per meter" } ], "clip_score": 30.1875, "loss": 2.5276870727539062 }, { "id": "identity_6841", "image": "Geometry3K/images/807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Chords J F and B C intersect at K. If B K = 8, K C = 12, and K F = 16, find J K.\nChoices:\n(A) 6\n(B) 12\n(C) 15\n(D) 18" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.15625, "loss": 2.3614399433135986 }, { "id": "identity_269189", "image": "AI2D/images/3976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of figure is depicted above?\nChoices:\n(A) aster flower head\n(B) part of bird\n(C) parts of human\n(D) part of animal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.65625, "loss": 4.28214168548584 }, { "id": "identity_177355", "image": "IconQA/images/8404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape appears only once in the sequence?" }, { "from": "gpt", "value": "The answer is Rocket" } ], "clip_score": 22.59375, "loss": 3.1963870525360107 }, { "id": "identity_313526", "image": "TQA/images/question_images/muscle_fiber_7080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the muscle contains actin and myosin?\nChoices:\n(A) Mitochondria\n(B) Nucleus\n(C) Sarcoplasm\n(D) Myofibrils" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.828125, "loss": 3.4083316326141357 }, { "id": "identity_3169", "image": "ScienceQA/images/14928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arkansas?\nChoices:\n(A) Little Rock\n(B) Fayetteville\n(C) Tulsa\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.015625, "loss": 4.154170989990234 }, { "id": "identity_47274", "image": "FigureQA/images/81817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value of Olive on the graph?" }, { "from": "gpt", "value": "The answer is Approximately 94" } ], "clip_score": 21.96875, "loss": 3.0634522438049316 }, { "id": "identity_203032", "image": "A-OKVQA/images/470028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the woman's top?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 18.65625, "loss": 4.164369583129883 }, { "id": "identity_1409", "image": "IconQA/images/96208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show two things Jon did Sunday afternoon. Which did Jon do earlier?\nChoices:\n(A) played soccer\n(B) ate a snack" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.046875, "loss": 3.602163314819336 }, { "id": "identity_218717", "image": "A-OKVQA/images/310240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is in the background?" }, { "from": "gpt", "value": "The answer is A red barn" } ], "clip_score": 20.140625, "loss": 3.4695630073547363 }, { "id": "identity_148755", "image": "DocVQA/images/kgcx0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many speakers are listed for Session VI?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 16.96875, "loss": 2.44614577293396 }, { "id": "identity_63733", "image": "TQA/images/question_images/human_system_digestive_6071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Through which part of the digestive system does food pass immediately after the stomach?" }, { "from": "gpt", "value": "The answer is Duodenum" } ], "clip_score": 33.84375, "loss": 1.983610987663269 }, { "id": "identity_251227", "image": "DocVQA/images/zybx0223_34.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contact email address for the Company Secretary, and what is the highest share price of VST Industries Limited in the given period?" }, { "from": "gpt", "value": "The answer is investors@vstind.com, 2400.00" } ], "clip_score": 18.890625, "loss": 2.166956663131714 }, { "id": "identity_135522", "image": "DocVQA/images/pkbh0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.140625, "loss": 4.484783172607422 }, { "id": "identity_87706", "image": "UniGeo/images/3074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is parallel to CD and the measure of angle 1 is 50 degrees, what is the measure of angle 4, the vertically opposite angle to angle 1?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 28.3125, "loss": 1.6664987802505493 }, { "id": "identity_278433", "image": "VQA2.0/images/419664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there toys for the baby?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.453125, "loss": 3.6153721809387207 }, { "id": "identity_183275", "image": "PMC-VQA/images/PMC3395576_F4_144858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structures labeled with 'Cg' in the image?" }, { "from": "gpt", "value": "The answer is Secretion of substances" } ], "clip_score": 23.71875, "loss": 3.71004581451416 }, { "id": "identity_268258", "image": "PlotQA/images/139358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the total of the gross intake ratio in Azerbaijan and El Salvador exceed the gross intake ratio in Hungary every year?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.40625, "loss": 2.7267205715179443 }, { "id": "identity_318804", "image": "ScienceQA/images/16869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary persuasive appeal used in this advertisement?\nChoices:\n(A) pathos (emotion)\n(B) logos (reason)\n(C) ethos (character)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1796875, "loss": 4.827360153198242 }, { "id": "identity_177408", "image": "IconQA/images/75427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a circle is inscribed within the triangle, what is the radius of the circle?" }, { "from": "gpt", "value": "The answer is (2/3) * sqrt(3) cm" } ], "clip_score": 28.875, "loss": 1.8323025703430176 }, { "id": "identity_134320", "image": "DocVQA/images/mhjw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the circulation quantity mentioned?" }, { "from": "gpt", "value": "The answer is 25,000" } ], "clip_score": 17.84375, "loss": 1.9557181596755981 }, { "id": "identity_113128", "image": "Super-CLEVR/images/superCLEVR_new_001889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of wheels visible in the image?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 19.828125, "loss": 1.9246338605880737 }, { "id": "identity_264978", "image": "ScienceQA/images/20816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items can Lexi and Cindy exchange to fulfill their respective desires?\nChoices:\n(A) Lexi can trade her tomatoes for Cindy's broccoli.\n(B) Cindy can trade her almonds for Lexi's tomatoes.\n(C) Cindy can trade her broccoli for Lexi's oranges.\n(D) Lexi can trade her tomatoes for Cindy's carrots." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.59375, "loss": 3.0564608573913574 }, { "id": "identity_141760", "image": "DocVQA/images/mynm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much was the January Budget for Research and Development?" }, { "from": "gpt", "value": "The answer is 33.7 billion dollars" } ], "clip_score": 25.046875, "loss": 2.219484567642212 }, { "id": "identity_87086", "image": "Geometry3K/images/1134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABD?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 32.3125, "loss": 1.9934998750686646 }, { "id": "identity_159957", "image": "UniGeo/images/3437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the smaller protractor is rotated such that its zero degree line aligns with line BP, what would be the new measure of angle O2PO1?" }, { "from": "gpt", "value": "The answer is 130 degrees" } ], "clip_score": 31.296875, "loss": 2.0762522220611572 }, { "id": "identity_92943", "image": "TabMWP/images/32224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many months had a waiting time of 30 minutes or more?" }, { "from": "gpt", "value": "The answer is 2 months" } ], "clip_score": 31.15625, "loss": 2.579782247543335 }, { "id": "identity_20498", "image": "TabMWP/images/30484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a bottle of ketchup cost than a bottle of hot sauce?" }, { "from": "gpt", "value": "The answer is 0.28" } ], "clip_score": 31.640625, "loss": 1.8976274728775024 }, { "id": "identity_246471", "image": "GeoQA+/images/5696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在矩形ABCD中,AB=2,∠AOB=60°,求矩形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 4√3 / 3" } ], "clip_score": 27.21875, "loss": 2.3229029178619385 }, { "id": "identity_14302", "image": "VQA2.0/images/5115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the TV on?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.3125, "loss": 3.113450288772583 }, { "id": "identity_60625", "image": "MapQA/images/map_10681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states without available data on bird percentages?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.046875, "loss": 2.7439255714416504 }, { "id": "identity_195284", "image": "A-OKVQA/images/333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the person's shorts?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 17.125, "loss": 3.6998140811920166 }, { "id": "identity_34287", "image": "CLEVR-Math/images/CLEVR_train_057125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 small red spheres. How many objects exist?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.9375, "loss": 1.9694290161132812 }, { "id": "identity_83438", "image": "Geometry3K/images/298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relationship between angle M in polygon LMNO and angle I in polygon GHIJ?" }, { "from": "gpt", "value": "The answer is Angle M is congruent to angle I, which is (y + 30) degrees" } ], "clip_score": 37.3125, "loss": 2.2268524169921875 }, { "id": "identity_13480", "image": "VizWiz/images/VizWiz_train_00014710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.453125, "loss": 3.769083023071289 }, { "id": "identity_35056", "image": "TabMWP/images/6218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Bonnie has $0.07. Does she have enough to buy a piece of polka-dotted paper and a piece of floral stationery?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.265625, "loss": 3.453235626220703 }, { "id": "identity_243839", "image": "TabMWP/images/2538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean number of newspapers collected by the students, and how does it compare to the median?" }, { "from": "gpt", "value": "The answer is The mean is 94.8, which is less than the median" } ], "clip_score": 30.6875, "loss": 2.6559386253356934 }, { "id": "identity_44095", "image": "FigureQA/images/6140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph have any bars with equal values?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.4375, "loss": 3.443577527999878 }, { "id": "identity_170602", "image": "TabMWP/images/22279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total number of art contest entries from 2006 to 2010?" }, { "from": "gpt", "value": "The answer is 113" } ], "clip_score": 40.78125, "loss": 2.0427210330963135 }, { "id": "identity_180362", "image": "TabMWP/images/6288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy 3 pounds of Manila scrap and 2 pounds of vellum scrap, what is the total cost?" }, { "from": "gpt", "value": "The answer is $8" } ], "clip_score": 33.125, "loss": 2.9351303577423096 }, { "id": "identity_99883", "image": "IconQA/images/24933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents the radius of a circle, what would be the diameter of the circle?" }, { "from": "gpt", "value": "The answer is 12 centimeters" } ], "clip_score": 25.921875, "loss": 1.8170026540756226 }, { "id": "identity_331783", "image": "A-OKVQA/images/228910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What hobby might this individual have?\nChoices:\n(A) competitive eating\n(B) photography\n(C) weight lifting\n(D) gaming" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3203125, "loss": 4.083144187927246 }, { "id": "identity_91604", "image": "Geometry3K/images/736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of one of the six equilateral triangles formed by the pink lines and the radius of the circle?" }, { "from": "gpt", "value": "The answer is 25sqrt(3)" } ], "clip_score": 29.53125, "loss": 2.140115737915039 }, { "id": "identity_200361", "image": "PMC-VQA/images/PMC8593536_F1_113266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to capture this image?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scan" } ], "clip_score": 34.4375, "loss": 1.7709702253341675 }, { "id": "identity_142387", "image": "FigureQA/images/77180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points does Medium Mint have above 80 on the y-axis?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.203125, "loss": 2.54315447807312 }, { "id": "identity_136312", "image": "MapQA/images/map_6010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of mates in Colorado?" }, { "from": "gpt", "value": "The answer is 46.4%-58.9%" } ], "clip_score": 31.484375, "loss": 1.8971349000930786 }, { "id": "identity_111447", "image": "TextVQA/images/05bd3c543fbfe4af.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is predominantly used in the text of this image?" }, { "from": "gpt", "value": "The answer is Portuguese" } ], "clip_score": 17.03125, "loss": 3.418330669403076 }, { "id": "identity_152949", "image": "ScienceQA/images/2161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many points does this antler have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.359375, "loss": 2.2602007389068604 }, { "id": "identity_308279", "image": "VQA-RAD/images/synpic20260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small mass near this kidney?" }, { "from": "gpt", "value": "The answer is Splenule" } ], "clip_score": 26.3125, "loss": 4.255627632141113 }, { "id": "identity_186061", "image": "TextVQA/images/00d3bc72520af34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the footwear of the player to the left in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.3125, "loss": 3.8371543884277344 }, { "id": "identity_283875", "image": "PMC-VQA/images/PMC7892591_F9_370641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which view is used to acquire the 3D data set? \nChoices:\n(A) Mid-gastric 90° RV inflow-outflow tract view \n(B) Mid-esophageal 60° RV inflow-outflow tract view \n(C) Apical four-chamber view \n(D) Subcostal four-chamber view" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.21875, "loss": 3.9832966327667236 }, { "id": "identity_99936", "image": "IconQA/images/33597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove two dots from each row, how many dots will remain?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.453125, "loss": 2.2450733184814453 }, { "id": "identity_158016", "image": "ScienceQA/images/19927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnetic polarity at the right end of the second magnet?" }, { "from": "gpt", "value": "The answer is South pole" } ], "clip_score": 21.171875, "loss": 3.9075145721435547 }, { "id": "identity_78053", "image": "AI2D/images/4169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the initial stage that leads to the formation of metamorphic rocks in the cycle?" }, { "from": "gpt", "value": "The answer is Melting" } ], "clip_score": 25.578125, "loss": 2.8678274154663086 }, { "id": "identity_119287", "image": "VQA-AS/images/5033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are there in the room?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.5, "loss": 2.5924322605133057 }, { "id": "identity_100472", "image": "TabMWP/images/10597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 soccer fields are removed, how many sprinklers will be left?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.671875, "loss": 1.884905457496643 }, { "id": "identity_28098", "image": "DocVQA/images/rlvm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the ESD Project Leader as per the document?" }, { "from": "gpt", "value": "The answer is Philip Patterson" } ], "clip_score": 22.6875, "loss": 3.494943618774414 }, { "id": "identity_75513", "image": "TQA/images/question_images/muscle_fiber_7086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mitochondria are visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.484375, "loss": 2.417104482650757 }, { "id": "identity_166710", "image": "IconQA/images/6998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of squares to the total number of shapes?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 28.109375, "loss": 1.5821256637573242 }, { "id": "identity_7823", "image": "GeoQA+/images/3586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠C=90°,AC=5,BC=12,则AB=()\nChoices:\n(A) 12\n(B) 13\n(C) 14\n(D) 15" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.15625, "loss": 2.15175199508667 }, { "id": "identity_225076", "image": "PMC-VQA/images/PMC8472088_diagnostics-11-01726-f003_70102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the white arrow, and what is its clinical significance in relation to esophageal varices?" }, { "from": "gpt", "value": "The answer is Risk of bleeding due to esophageal varices" } ], "clip_score": 26.84375, "loss": 2.3914854526519775 }, { "id": "identity_156323", "image": "AI2D/images/1360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the layer of vegetation that appears on the right side of the image?" }, { "from": "gpt", "value": "The answer is Vegetation" } ], "clip_score": 25.0625, "loss": 3.218324899673462 }, { "id": "identity_44566", "image": "FigureQA/images/45275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Crimson" } ], "clip_score": 36.03125, "loss": 2.528074026107788 }, { "id": "identity_309177", "image": "PMC-VQA/images/PMC7057551_Fig3_129314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the staining used on this sample?\nChoices:\n(A) crystal violet-amaranth\n(B) Wright-Giemsa\n(C) hematoxylin-eosin\n(D) safranin-fast green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.265625, "loss": 3.9014198780059814 }, { "id": "identity_105041", "image": "VQA-AS/images/5975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the couch?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 28.015625, "loss": 3.1940770149230957 }, { "id": "identity_45673", "image": "DVQA/images/bar_train_00012169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the 'bed' category across all stacks?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 22.890625, "loss": 2.3500845432281494 }, { "id": "identity_264847", "image": "DocVQA/images/zpcb0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who holds the position of Staff Director on the Secretary's Commission on Pesticides?" }, { "from": "gpt", "value": "The answer is Dr. Albert C. Kolbye, Jr." } ], "clip_score": 27.28125, "loss": 1.2182444334030151 }, { "id": "identity_97910", "image": "CLEVR-Math/images/CLEVR_train_024505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 large object. How many large objects exist?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.40625, "loss": 2.1088249683380127 }, { "id": "identity_138984", "image": "DVQA/images/bar_train_00143920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the least preferred sub-category in the 'widow' category?" }, { "from": "gpt", "value": "The answer is movie" } ], "clip_score": 22.703125, "loss": 4.150694370269775 }, { "id": "identity_310515", "image": "DocVQA/images/mhkm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is cc'd in the document?" }, { "from": "gpt", "value": "The answer is Dr. William J. Darby" } ], "clip_score": 24.578125, "loss": 2.637606143951416 }, { "id": "identity_12032", "image": "ChartQA/images/multi_col_20338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of Hudson's Bay Company stores from 2017 to 2018?" }, { "from": "gpt", "value": "The answer is 89.5" } ], "clip_score": 24.203125, "loss": 2.332230567932129 }, { "id": "identity_196098", "image": "PMC-VQA/images/PMC8775891_ijms-23-00722-f006_170150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What region of the brain is labeled in the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is CA2 region of the hippocampus" } ], "clip_score": 24.90625, "loss": 2.1369831562042236 }, { "id": "identity_32847", "image": "A-OKVQA/images/408624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the small floats on the boats sides here meant to prevent the boats doing?\nChoices:\n(A) getting lost\n(B) soaring\n(C) bumping\n(D) sinking" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.8828125, "loss": 4.480626583099365 }, { "id": "identity_138185", "image": "ChartQA/images/OECD_ELDERLY_POPULATION_CAN_G20_ISR_SVN_000005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the elderly population percentage in Spain?" }, { "from": "gpt", "value": "The answer is 17.3" } ], "clip_score": 27.671875, "loss": 1.9474263191223145 }, { "id": "identity_183317", "image": "VQA2.0/images/222036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any individuals in the background engaging in a similar activity as the foreground individuals?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.96875, "loss": 3.0365333557128906 }, { "id": "identity_91345", "image": "GeoQA+/images/2806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 53.13" } ], "clip_score": 31.28125, "loss": 2.1006858348846436 }, { "id": "identity_84548", "image": "GeoQA+/images/1599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AC is 6 and CB is 3, what is the length of segment OB?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.078125, "loss": 2.469083070755005 }, { "id": "identity_243515", "image": "DocVQA/images/kqph0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are mentioned in the letter, and what is the total number of rooms required for their stay?" }, { "from": "gpt", "value": "The answer is 6 rooms" } ], "clip_score": 15.3359375, "loss": 4.246647834777832 }, { "id": "identity_171735", "image": "TabMWP/images/31210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the value 20 appear in the table?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.65625, "loss": 1.7872158288955688 }, { "id": "identity_218838", "image": "A-OKVQA/images/554875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the rider wearing for safety?" }, { "from": "gpt", "value": "The answer is Helmet and protective vest" } ], "clip_score": 22.546875, "loss": 2.53518009185791 }, { "id": "identity_221205", "image": "TextVQA/images/34df2fed3d2c0aa3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What spice is labeled on the purple tag in the background?" }, { "from": "gpt", "value": "The answer is Cinnamon" } ], "clip_score": 18.953125, "loss": 2.459383726119995 }, { "id": "identity_307960", "image": "AI2D/images/3247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What surrounds this structure?\nChoices:\n(A) cell membrane\n(B) vacuole\n(C) ribosomes\n(D) Nuclear Envelope" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.640625, "loss": 4.014252662658691 }, { "id": "identity_275969", "image": "Geometry3K/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 2.\nOptions:\n(A) 26\n(B) 39\n(C) 55\n(D) 62" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.125, "loss": 3.1165788173675537 }, { "id": "identity_51299", "image": "PlotQA/images/126936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries' data is represented in the graph?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.5625, "loss": 2.766282796859741 }, { "id": "identity_216100", "image": "PMC-VQA/images/PMC8419424_F1_52227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lateral ventricles symmetrical in this image?" }, { "from": "gpt", "value": "The answer is Yes, they are symmetrical" } ], "clip_score": 33.0625, "loss": 2.4750239849090576 }, { "id": "identity_160219", "image": "GeoQA+/images/5921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知AB∥CD,BC平分∠ABE,若∠C=33°,则∠AEB的度数是()" }, { "from": "gpt", "value": "The answer is 33°" } ], "clip_score": 33.28125, "loss": 1.78764808177948 }, { "id": "identity_87583", "image": "UniGeo/images/97.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠1 is 42°, what is the measure of ∠BEG?" }, { "from": "gpt", "value": "The answer is 84°" } ], "clip_score": 32.5, "loss": 2.0670289993286133 }, { "id": "identity_8055", "image": "A-OKVQA/images/490871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of knife is pictured laying next to the apple?\nChoices:\n(A) bread\n(B) chef\n(C) boning\n(D) paring" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.265625, "loss": 3.4974048137664795 }, { "id": "identity_221768", "image": "PMC-VQA/images/PMC7841286_F1_354972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any blockages or occlusions in the retinal vessels?" }, { "from": "gpt", "value": "The answer is No blockages or occlusions identified" } ], "clip_score": 30.84375, "loss": 2.524845838546753 }, { "id": "identity_337346", "image": "TabMWP/images/29751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy a poetry book and a mystery novel?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 32.71875, "loss": 2.5981481075286865 }, { "id": "identity_147726", "image": "DocVQA/images/lywd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the reviewer?" }, { "from": "gpt", "value": "The answer is Dr. Robert Shank" } ], "clip_score": 22.734375, "loss": 3.0684287548065186 }, { "id": "identity_281864", "image": "CLEVR-Math/images/CLEVR_train_016512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 3 matte blocks, what is the total number of matte blocks?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.40625, "loss": 2.104743719100952 }, { "id": "identity_112231", "image": "VQA2.0/images/526098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is the large stuffed toy hanging above the store entrance?" }, { "from": "gpt", "value": "The answer is Dog" } ], "clip_score": 23.640625, "loss": 4.146139621734619 }, { "id": "identity_302478", "image": "MapQA/images/map_7105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in this region?" }, { "from": "gpt", "value": "The answer is 4168776-5034519" } ], "clip_score": 22.265625, "loss": 2.7054972648620605 }, { "id": "identity_44502", "image": "FigureQA/images/20118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Medium Orchid in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 67%" } ], "clip_score": 36.03125, "loss": 2.412449359893799 }, { "id": "identity_110491", "image": "A-OKVQA/images/534155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the aircraft displayed indoors or outdoors?" }, { "from": "gpt", "value": "The answer is Indoors" } ], "clip_score": 24.0, "loss": 2.3205809593200684 }, { "id": "identity_196969", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the red arrow pointing to?" }, { "from": "gpt", "value": "The answer is Pituitary gland" } ], "clip_score": 30.140625, "loss": 1.8881752490997314 }, { "id": "identity_2720", "image": "DVQA/images/bar_train_00007198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the red color represent?" }, { "from": "gpt", "value": "The answer is beam" } ], "clip_score": 26.984375, "loss": 6.13008451461792 }, { "id": "identity_222336", "image": "A-OKVQA/images/432317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any signage visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.4140625, "loss": 2.9907867908477783 }, { "id": "identity_251561", "image": "DocVQA/images/jxpj0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Coca-Cola's trademark volume in North America was low- and no-calorie in 1999, and how much did it increase by 2012?" }, { "from": "gpt", "value": "The answer is 9%" } ], "clip_score": 20.15625, "loss": 3.023285150527954 }, { "id": "identity_259538", "image": "A-OKVQA/images/233112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the three items closest to the bottle of alcohol, and in what order are they positioned relative to the bottle?" }, { "from": "gpt", "value": "The answer is Toilet, Magic Eye book, and paper (in that order)" } ], "clip_score": 23.359375, "loss": 2.263084888458252 }, { "id": "identity_80446", "image": "GeoQA+/images/4893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the perimeter of the parallelogram ABCD is 50m and one side AB is 16m, what is the length of side BC?" }, { "from": "gpt", "value": "The answer is 9m" } ], "clip_score": 26.328125, "loss": 2.4137539863586426 }, { "id": "identity_331015", "image": "VQA-AS/images/2574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is happening with the dog?" }, { "from": "gpt", "value": "The answer is playing" } ], "clip_score": 23.546875, "loss": 3.7777557373046875 }, { "id": "identity_298294", "image": "A-OKVQA/images/469010.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the three white lines?\nChoices:\n(A) parking\n(B) no stopping\n(C) yield\n(D) crosswalk" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.484375, "loss": 4.663332939147949 }, { "id": "identity_78833", "image": "GEOS/images/026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle, if the measure of the arc intercepted by one secant is 185° and the angle formed by the other secant outside the circle is 80°, what is the measure of the arc intercepted by the second secant?" }, { "from": "gpt", "value": "The answer is 25°" } ], "clip_score": 31.296875, "loss": 2.198305368423462 }, { "id": "identity_127485", "image": "VQA-AS/images/15406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the girl doing on the swing?" }, { "from": "gpt", "value": "The answer is The girl is swinging forward" } ], "clip_score": 29.25, "loss": 3.5280189514160156 }, { "id": "identity_288149", "image": "AI2D/images/2737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, how many stages are depicted?\nOptions:\n(A) 7\n(B) 6\n(C) none of the above\n(D) 8" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.09375, "loss": 3.2738983631134033 }, { "id": "identity_325850", "image": "Geometry3K/images/553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 10\n(B) 10 \\sqrt { 2 }\n(C) 10 \\sqrt { 3 }\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.53125, "loss": 3.021854877471924 }, { "id": "identity_165578", "image": "GeoQA+/images/1275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given image, if ∠B = 28° and triangle ABC is folded along line m such that point B coincides with point D, what is the measure of ∠1?" }, { "from": "gpt", "value": "The answer is 152°" } ], "clip_score": 36.71875, "loss": 1.9276390075683594 }, { "id": "identity_135448", "image": "DocVQA/images/kgpj0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of consumers mentioned who read labels?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 20.5, "loss": 1.7276142835617065 }, { "id": "identity_251829", "image": "TabMWP/images/23177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of pets owned by the students?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.4375, "loss": 2.47808575630188 }, { "id": "identity_95212", "image": "CLEVR-Math/images/CLEVR_train_056103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.71875, "loss": 2.631371259689331 }, { "id": "identity_118031", "image": "TextVQA/images/e9a38a77f3e7801f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the temperature and weather condition in Turin on the current Thursday?" }, { "from": "gpt", "value": "The answer is Mostly clear, 61 degrees" } ], "clip_score": 13.84375, "loss": 2.973576545715332 }, { "id": "identity_294347", "image": "Geometry3K/images/770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 3\n(B) 4\n(C) 6\n(D) 12" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.734375, "loss": 2.8235085010528564 }, { "id": "identity_197742", "image": "A-OKVQA/images/158713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is A motorcycle with a sidecar" } ], "clip_score": 23.1875, "loss": 2.7467949390411377 }, { "id": "identity_303959", "image": "FigureQA/images/56209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.71875, "loss": 4.08229398727417 }, { "id": "identity_332077", "image": "TabMWP/images/10267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the people surveyed preferred chatting with coworkers during their break? Simplify your answer." }, { "from": "gpt", "value": "The answer is 32/209" } ], "clip_score": 28.046875, "loss": 2.4142374992370605 }, { "id": "identity_85111", "image": "UniGeo/images/2723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of parallelogram ABCD is 40, what is the height of the parallelogram when considering AD as the base?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.671875, "loss": 2.1415393352508545 }, { "id": "identity_310164", "image": "TabMWP/images/30325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tanks contain between 50 and 79 fish?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.484375, "loss": 3.1448583602905273 }, { "id": "identity_39470", "image": "DVQA/images/bar_train_00146454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the 'pot' category in the 'device' group?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.28125, "loss": 2.8741025924682617 }, { "id": "identity_144214", "image": "PlotQA/images/85731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region has shown a consistent increase in government consumption over the years?" }, { "from": "gpt", "value": "The answer is Middle East & North Africa (developing only)" } ], "clip_score": 30.125, "loss": 2.2816038131713867 }, { "id": "identity_319459", "image": "UniGeo/images/3149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 一个几何体的主视图和左视图都是边长为30cm的正三角形,俯视图是一个圆,那么这个几何体的侧面积是()\n\nChoices:\n(A) 250πcm²\n(B) 300πcm²\n(C) 450πcm²\n(D) 100√πcm²" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.046875, "loss": 2.956145763397217 }, { "id": "identity_149660", "image": "DocVQA/images/fzjf0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the PLA number mentioned at the bottom right of the document?" }, { "from": "gpt", "value": "The answer is PLA-TAK-00053932" } ], "clip_score": 17.671875, "loss": 2.331815004348755 }, { "id": "identity_110640", "image": "VizWiz/images/VizWiz_train_00007149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary material of the object on the left?" }, { "from": "gpt", "value": "The answer is Plastic or rubber" } ], "clip_score": 24.046875, "loss": 3.1399648189544678 }, { "id": "identity_191716", "image": "PMC-VQA/images/PMC6883550_Fig2_75821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Fluorescence microscopy" } ], "clip_score": 26.125, "loss": 1.4528918266296387 }, { "id": "identity_137228", "image": "DVQA/images/bar_train_00145236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of 'desk' in the 'show' category?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 20.875, "loss": 2.7955405712127686 }, { "id": "identity_79322", "image": "Geometry3K/images/805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the expression 2x + 6 when x = 2?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 28.6875, "loss": 2.0234103202819824 }, { "id": "identity_154613", "image": "ScienceQA/images/10202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Victoria amazonica leaves are visible in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 34.1875, "loss": 2.4062917232513428 }, { "id": "identity_45104", "image": "DVQA/images/bar_train_00112479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy of all algorithms combined?" }, { "from": "gpt", "value": "The answer is 39" } ], "clip_score": 34.0, "loss": 2.7345776557922363 }, { "id": "identity_314062", "image": "A-OKVQA/images/548164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of day is shown in the image?\nChoices:\n(A) afternoon\n(B) evening\n(C) night\n(D) morning" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.4375, "loss": 4.196793556213379 }, { "id": "identity_207775", "image": "PMC-VQA/images/PMC8215858_FIG1_472110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is associated with the inset image?" }, { "from": "gpt", "value": "The answer is f" } ], "clip_score": 23.296875, "loss": 3.570098876953125 }, { "id": "identity_282957", "image": "ScienceQA/images/20904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which trade option would allow Francesca and Desmond to exchange items to get what they want?\nChoices:\n(A) Francesca can trade her tomatoes for Desmond's sandwich.\n(B) Desmond can trade his broccoli for Francesca's oranges.\n(C) Francesca can trade her tomatoes for Desmond's broccoli.\n(D) Desmond can trade his almonds for Francesca's tomatoes." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.34375, "loss": 3.986781358718872 }, { "id": "identity_168863", "image": "IconQA/images/42300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one flower is removed, how many petals will be left?" }, { "from": "gpt", "value": "The answer is 45 petals" } ], "clip_score": 29.859375, "loss": 2.063951253890991 }, { "id": "identity_286218", "image": "PMC-VQA/images/PMC5924497_Fig2_307463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging techniques are utilized in this study?\nChoices:\n(A) X-ray and MRI\n(B) ECG and PET\n(C) ECG and ICG\n(D) ICG and CT" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.359375, "loss": 3.455997943878174 }, { "id": "identity_87417", "image": "UniGeo/images/3456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 5 and the distance from point A to the center O is 3, what is the length of the tangent from point A to the circle?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.03125, "loss": 2.289093017578125 }, { "id": "identity_163484", "image": "UniGeo/images/2464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,点D在AB的延长线上,DC切⊙O于点C,若∠A=25°,则∠BCO等于()" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 25.484375, "loss": 1.6411348581314087 }, { "id": "identity_263195", "image": "A-OKVQA/images/250808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fast food restaurant is located close to the lights?\nChoices:\n(A) McDonald's\n(B) Burger King\n(C) Popeye's\n(D) Wendy's" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.9453125, "loss": 3.9129347801208496 }, { "id": "identity_70996", "image": "ScienceQA/images/12955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the image?" }, { "from": "gpt", "value": "The answer is Nevada" } ], "clip_score": 32.6875, "loss": 2.4252755641937256 }, { "id": "identity_255287", "image": "PMC-VQA/images/PMC9415092_medicina-58-01070-f004_385930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of Acute Posterior Multifocal Placoid Pigment Epitheliopathy (APMPPE) as seen in the image, and how do they differ from the features of Age-related Macular Degeneration (AMD) and Diabetic Retinopathy (DR)?" }, { "from": "gpt", "value": "The answer is APMPPE features multiple placoid lesions, unlike AMD or DR" } ], "clip_score": 30.734375, "loss": 2.7124903202056885 }, { "id": "identity_142335", "image": "DocVQA/images/xkbw0217_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number where Table 3 is found?" }, { "from": "gpt", "value": "The answer is 43" } ], "clip_score": 15.6171875, "loss": 2.3886921405792236 }, { "id": "identity_238012", "image": "ScienceQA/images/5192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the physical properties of the stuffed tiger and explain why it is classified as a solid" }, { "from": "gpt", "value": "The answer is The stuffed tiger is a solid because it has a definite shape and volume" } ], "clip_score": 32.3125, "loss": 2.236603260040283 }, { "id": "identity_308062", "image": "PMC-VQA/images/PMC5666125_f0020_233999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the sample type shown here?\nChoices:\n(A) wheat groat section \n(B) oat groat section \n(C) wheat flour \n(D) corn flour" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.796875, "loss": 4.87431001663208 }, { "id": "identity_220047", "image": "A-OKVQA/images/381630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation system is depicted in the image?" }, { "from": "gpt", "value": "The answer is Elevated train system" } ], "clip_score": 30.296875, "loss": 2.817072868347168 }, { "id": "identity_22841", "image": "TabMWP/images/10914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each soccer field has 2 sprinklers. How many sprinklers are on 3 soccer fields?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.21875, "loss": 1.8252472877502441 }, { "id": "identity_191083", "image": "PMC-VQA/images/PMC7137677_FIG3_145941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of joint is shown in the x-ray?" }, { "from": "gpt", "value": "The answer is Hinge joint" } ], "clip_score": 30.53125, "loss": 4.220633029937744 }, { "id": "identity_231298", "image": "DVQA/images/bar_train_00073716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item with the highest sales in each store?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 28.4375, "loss": 2.4527173042297363 }, { "id": "identity_5290", "image": "FigureQA/images/9237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Orchid the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.875, "loss": 3.707778215408325 }, { "id": "identity_186062", "image": "TextVQA/images/00d3bc72520af34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible players are wearing a glove on their left hand?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.71875, "loss": 4.738255500793457 }, { "id": "identity_50017", "image": "DVQA/images/bar_train_00043055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stars are there on the 'water' category bar in 'mind'?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.59375, "loss": 2.465728998184204 }, { "id": "identity_187247", "image": "PMC-VQA/images/PMC4149355_pone-0099655-g001_316615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification level might this image be taken at?" }, { "from": "gpt", "value": "The answer is Between 1000x and 2000x magnification" } ], "clip_score": 28.609375, "loss": 1.5310561656951904 }, { "id": "identity_306248", "image": "AI2D/images/1078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost layer of a plant cell?\nChoices:\n(A) Amylosplast\n(B) Cell Membrane\n(C) Cell wall\n(D) Cytoplasm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.953125, "loss": 4.031688213348389 }, { "id": "identity_323381", "image": "A-OKVQA/images/536725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of pants is this person wearing?\nChoices:\n(A) corduroy\n(B) cargo pants\n(C) jeans\n(D) dress pants" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.390625, "loss": 4.286062240600586 }, { "id": "identity_12027", "image": "VQA-AS/images/2439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sky?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 20.28125, "loss": 3.8098037242889404 }, { "id": "identity_232058", "image": "UniGeo/images/3449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,若∠OBC=20°,且∠BAC=2∠BCA,则∠A的度数为多少?" }, { "from": "gpt", "value": "The answer is approximately 106.67°" } ], "clip_score": 32.4375, "loss": 2.166574001312256 }, { "id": "identity_76800", "image": "AI2D/images/2046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organisms are primary consumers in this ecosystem?" }, { "from": "gpt", "value": "The answer is Snail, weevil, grasshopper, beetle" } ], "clip_score": 27.0, "loss": 1.8934797048568726 }, { "id": "identity_48625", "image": "DVQA/images/bar_train_00160892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference for 'twelve' between the 'plug' and 'side' categories?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.609375, "loss": 2.97516131401062 }, { "id": "identity_48014", "image": "FigureQA/images/76410.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the Medium Seafoam line consistently decreasing?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.484375, "loss": 3.400836944580078 }, { "id": "identity_182626", "image": "VQA-AS/images/14520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 26.296875, "loss": 2.9407095909118652 }, { "id": "identity_259483", "image": "DocVQA/images/gfdl0079_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for both the General Market and Hispanic Market combined, and how much more is the total cost compared to the General Market cost alone?" }, { "from": "gpt", "value": "The answer is $24,950 total, $12,500 more than General Market" } ], "clip_score": 21.484375, "loss": 2.334376335144043 }, { "id": "identity_241889", "image": "VQA2.0/images/278335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of sports equipment can you identify in this sports shop?" }, { "from": "gpt", "value": "The answer is Three types" } ], "clip_score": 17.796875, "loss": 4.730043411254883 }, { "id": "identity_148637", "image": "DocVQA/images/phvd0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first issue mentioned under 'Chronic Disease'?" }, { "from": "gpt", "value": "The answer is Absence of health district involvement in multiphasic screening and well adult clinics" } ], "clip_score": 19.0625, "loss": 2.537337303161621 }, { "id": "identity_15122", "image": "TabMWP/images/15946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 4 kilograms of white rice?" }, { "from": "gpt", "value": "The answer is 5.32" } ], "clip_score": 33.0, "loss": 2.4391934871673584 }, { "id": "identity_55543", "image": "DVQA/images/bar_train_00106175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest value?" }, { "from": "gpt", "value": "The answer is decade" } ], "clip_score": 21.84375, "loss": 3.0878686904907227 }, { "id": "identity_204778", "image": "PMC-VQA/images/PMC6195911_f1_384201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a lesion or abnormality in the area indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Yes, there is a hypoechoic area indicating a possible lesion" } ], "clip_score": 30.328125, "loss": 2.1854941844940186 }, { "id": "identity_319205", "image": "ScienceQA/images/7862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Tennessee\n(B) New Hampshire\n(C) Michigan\n(D) Maine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.53125, "loss": 4.522190093994141 }, { "id": "identity_315731", "image": "ScienceQA/images/5361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What evidence of an earthquake does this image show?\nChoices:\n(A) There are few clouds in the sky.\n(B) Part of a building broke apart and fell down." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.46875, "loss": 3.8799922466278076 }, { "id": "identity_141762", "image": "DocVQA/images/mynm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much did the Total Budget increase from January to the Revised March Base?" }, { "from": "gpt", "value": "The answer is 12.9 billion dollars" } ], "clip_score": 24.390625, "loss": 1.9702976942062378 }, { "id": "identity_182284", "image": "PMC-VQA/images/PMC2737537_F2_44773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the ribs be clearly identified in the image?" }, { "from": "gpt", "value": "The answer is Yes, ribs are visible" } ], "clip_score": 28.21875, "loss": 3.192080020904541 }, { "id": "identity_168922", "image": "IconQA/images/27904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each section represents a day of the week, how many days are orange?" }, { "from": "gpt", "value": "The answer is 3 days" } ], "clip_score": 34.65625, "loss": 2.460646152496338 }, { "id": "identity_5578", "image": "MapQA/images/map_1641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the MidWest ?" }, { "from": "gpt", "value": "The answer is 706788-713179" } ], "clip_score": 26.078125, "loss": 2.850029945373535 }, { "id": "identity_282826", "image": "DVQA/images/bar_train_00110349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the mother in the signal less than the value of the list in the feed?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.328125, "loss": 3.911184310913086 }, { "id": "identity_333163", "image": "DocVQA/images/mggn0226_40.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the device?" }, { "from": "gpt", "value": "The answer is DePuy Pinnacle Metal-On-Metal Acetabular Cup Liners" } ], "clip_score": 25.171875, "loss": 1.7837640047073364 }, { "id": "identity_260364", "image": "ChartQA/images/OECD_CHILD_VACCINATION_RATES_BRA_COL_CRI_DNK_EST_000016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average vaccination rate of the countries excluding the one with the highest rate?" }, { "from": "gpt", "value": "The answer is 94.5" } ], "clip_score": 23.109375, "loss": 2.2306108474731445 }, { "id": "identity_82407", "image": "Geometry3K/images/1681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of side AE in trapezoid AEFG" }, { "from": "gpt", "value": "The answer is 156/7" } ], "clip_score": 36.09375, "loss": 2.757636308670044 }, { "id": "identity_101593", "image": "TabMWP/images/2276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which elementary school has the highest number of swings?" }, { "from": "gpt", "value": "The answer is Valley Elementary" } ], "clip_score": 40.1875, "loss": 2.1385722160339355 }, { "id": "identity_282069", "image": "GeoQA+/images/748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a ruler and a right-angled triangle board with a 30° angle are placed as shown in the figure, and ∠1 is 34°, what is the degree measure of ∠2?\nChoices:\n(A) 114°\n(B) 124°\n(C) 116°\n(D) 126°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.046875, "loss": 2.506166934967041 }, { "id": "identity_338495", "image": "CLEVR-Math/images/CLEVR_train_004642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all large metallic cylinders. How many cylinders remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.875, "loss": 2.335779905319214 }, { "id": "identity_74793", "image": "ScienceQA/images/18705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are visible in this image, excluding the highlighted one?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.859375, "loss": 2.4942028522491455 }, { "id": "identity_60036", "image": "FigureQA/images/94945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lowest Medium Mint dot below the lowest Teal dot on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.046875, "loss": 3.401742935180664 }, { "id": "identity_124502", "image": "A-OKVQA/images/451324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any mobile phones visible on the table?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.296875, "loss": 3.1301991939544678 }, { "id": "identity_64106", "image": "ScienceQA/images/5173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of habitat does this bird likely reside in based on its appearance?" }, { "from": "gpt", "value": "The answer is Woodland or forested areas" } ], "clip_score": 27.546875, "loss": 2.4978504180908203 }, { "id": "identity_164802", "image": "GeoQA+/images/638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the height of the ladder against the wall to the distance from the wall to the bottom of the ladder after it slides down 4m?" }, { "from": "gpt", "value": "The answer is ratio = 21m / sqrt((25m)^2 - (21m)^2)" } ], "clip_score": 27.546875, "loss": 2.0814807415008545 }, { "id": "identity_9387", "image": "DVQA/images/bar_train_00051944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is band" } ], "clip_score": 18.25, "loss": 4.0882248878479 }, { "id": "identity_315882", "image": "DVQA/images/bar_train_00117237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth group of bars?" }, { "from": "gpt", "value": "The answer is estate" } ], "clip_score": 19.859375, "loss": 5.056181907653809 }, { "id": "identity_261293", "image": "TextVQA/images/ef0550bc74e9c8d1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of beverage company is acting as a sponsor?" }, { "from": "gpt", "value": "The answer is shiner beers" } ], "clip_score": 13.171875, "loss": 2.4294917583465576 }, { "id": "identity_132143", "image": "Super-CLEVR/images/superCLEVR_new_001924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 21.359375, "loss": 2.8750104904174805 }, { "id": "identity_25508", "image": "PMC-VQA/images/PMC8201921_Fig2_467736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body did the medical imaging focus on? \nChoices:\n(A) The heart \n(B) The brain \n(C) The lungs \n(D) The liver " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 4.667037010192871 }, { "id": "identity_139361", "image": "PlotQA/images/20994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are plotted on the graph?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.34375, "loss": 2.53674578666687 }, { "id": "identity_135323", "image": "DocVQA/images/rzld0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the invoice number?" }, { "from": "gpt", "value": "The answer is 7093" } ], "clip_score": 25.546875, "loss": 2.770415782928467 }, { "id": "identity_200744", "image": "A-OKVQA/images/392851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the traffic lights in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 10.7890625, "loss": 3.0446200370788574 }, { "id": "identity_91484", "image": "Geometry3K/images/524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle GSH in circle J?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 35.96875, "loss": 2.239889621734619 }, { "id": "identity_279626", "image": "AI2D/images/2967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the leaf connected to the stem?\nChoices:\n(A) Blade\n(B) Petiole\n(C) Bud\n(D) Leaflet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.375, "loss": 3.8188540935516357 }, { "id": "identity_185716", "image": "PMC-VQA/images/PMC7876301_F2_365894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the two brain images in the top row from the same individual?" }, { "from": "gpt", "value": "The answer is Yes, they are likely from the same individual" } ], "clip_score": 27.28125, "loss": 2.275721549987793 }, { "id": "identity_206133", "image": "TextVQA/images/2454148f94e521ce.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the text \"dig\" in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 13.6171875, "loss": 3.0578978061676025 }, { "id": "identity_20141", "image": "ScienceQA/images/20615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.234375, "loss": 3.587799072265625 }, { "id": "identity_125384", "image": "TextVQA/images/7bf2efb3f6d4c96a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the cups?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 27.28125, "loss": 3.7753543853759766 }, { "id": "identity_125816", "image": "Super-CLEVR/images/superCLEVR_new_000831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the brown bicycle in relation to the red bus?" }, { "from": "gpt", "value": "The answer is To the right and slightly in front" } ], "clip_score": 30.21875, "loss": 3.044687032699585 }, { "id": "identity_254509", "image": "GeoQA+/images/5613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,为了测量油桶内油面的高度,将一根细木棒自油桶边缘的小孔插入桶内,测得木棒插入部分的长为100cm,木棒上沾油部分的长为60cm,桶高为80cm,如果油桶的直径为50cm,那么油桶内油面的体积是多少立方厘米?" }, { "from": "gpt", "value": "The answer is 25000π cm³" } ], "clip_score": 20.75, "loss": 2.2479934692382812 }, { "id": "identity_140776", "image": "DocVQA/images/tsyd0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year does this business plan pertain to?" }, { "from": "gpt", "value": "The answer is 1995" } ], "clip_score": 31.34375, "loss": 1.525012493133545 }, { "id": "identity_139540", "image": "DVQA/images/bar_train_00145605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between \"victim\" and \"hell\" in the \"latter\" store?" }, { "from": "gpt", "value": "The answer is 2.5 units" } ], "clip_score": 27.65625, "loss": 1.8898285627365112 }, { "id": "identity_173622", "image": "TabMWP/images/11947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy one of each item listed?" }, { "from": "gpt", "value": "The answer is $116" } ], "clip_score": 28.21875, "loss": 2.2559540271759033 }, { "id": "identity_34775", "image": "A-OKVQA/images/562084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are all the umbrellas there?\nChoices:\n(A) keep dry\n(B) for sale\n(C) left there\n(D) sun protection" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.703125, "loss": 3.981205463409424 }, { "id": "identity_328301", "image": "TextVQA/images/980c3b567809621d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this place?" }, { "from": "gpt", "value": "The answer is publix" } ], "clip_score": 21.9375, "loss": 4.600147247314453 }, { "id": "identity_194553", "image": "PMC-VQA/images/PMC8102829_F4_435054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Fluorescence microscopy" } ], "clip_score": 28.734375, "loss": 1.41127347946167 }, { "id": "identity_65313", "image": "TQA/images/question_images/human_system_respiratory_6203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the alveoli in the respiratory system?" }, { "from": "gpt", "value": "The answer is Gas exchange between air and blood" } ], "clip_score": 29.40625, "loss": 2.595427989959717 }, { "id": "identity_241526", "image": "ScienceQA/images/3615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary producer and the tertiary consumer in this food web" }, { "from": "gpt", "value": "The answer is bilberry and rough-legged hawk" } ], "clip_score": 30.484375, "loss": 2.6002609729766846 }, { "id": "identity_69923", "image": "ScienceQA/images/56.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique items are there in total on Maureen and Kendrick's want lists?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 24.546875, "loss": 2.1941187381744385 }, { "id": "identity_105367", "image": "VQA2.0/images/326484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is on the podium?" }, { "from": "gpt", "value": "The answer is Laptop" } ], "clip_score": 15.3671875, "loss": 2.9403774738311768 }, { "id": "identity_113319", "image": "PMC-VQA/images/PMC5459700_F1_175519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the yellow lines in the image?" }, { "from": "gpt", "value": "The answer is They represent measurements of anatomical distances" } ], "clip_score": 22.984375, "loss": 3.263035774230957 }, { "id": "identity_271392", "image": "FigureQA/images/87812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the least valuable option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.3125, "loss": 3.9170384407043457 }, { "id": "identity_179283", "image": "CLEVR-Math/images/CLEVR_train_002348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither cube nor cylinder?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.09375, "loss": 2.5070745944976807 }, { "id": "identity_184656", "image": "PMC-VQA/images/PMC5031756_Fig1_75466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is shown in the image?" }, { "from": "gpt", "value": "The answer is Specific organ or body part tissue" } ], "clip_score": 26.90625, "loss": 3.859158515930176 }, { "id": "identity_187276", "image": "PMC-VQA/images/PMC7806341_F1_342023.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Lateral view of the left side of the patient's face" } ], "clip_score": 31.859375, "loss": 2.6304943561553955 }, { "id": "identity_68633", "image": "ScienceQA/images/222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of offspring being homozygous dominant for the trait in question?" }, { "from": "gpt", "value": "The answer is 2/4 or 1/2" } ], "clip_score": 19.671875, "loss": 1.1754504442214966 }, { "id": "identity_30617", "image": "VQA-AS/images/4555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cat doing?" }, { "from": "gpt", "value": "The answer is playing" } ], "clip_score": 21.5625, "loss": 3.6283316612243652 }, { "id": "identity_97453", "image": "IconQA/images/66562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of green squares to the total number of squares?" }, { "from": "gpt", "value": "The answer is 3:16" } ], "clip_score": 24.5625, "loss": 1.7664250135421753 }, { "id": "identity_98684", "image": "TabMWP/images/2007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many offices are exactly 25 square meters?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.078125, "loss": 2.09407901763916 }, { "id": "identity_125630", "image": "PMC-VQA/images/PMC2894136_pntd-0000726-g005_67834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Two arrows" } ], "clip_score": 21.421875, "loss": 3.502678394317627 }, { "id": "identity_197251", "image": "A-OKVQA/images/234994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the person wearing the gear behind the batter?" }, { "from": "gpt", "value": "The answer is Catcher" } ], "clip_score": 28.703125, "loss": 2.263519048690796 }, { "id": "identity_245887", "image": "GeoQA+/images/2680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, BC是⊙O的直径, 若∠ABC=25°, ∠ACD的度数是多少?" }, { "from": "gpt", "value": "The answer is 45°" } ], "clip_score": 32.375, "loss": 1.7618255615234375 }, { "id": "identity_187840", "image": "PMC-VQA/images/PMC5467982_FIG1_178013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any mass effect or midline shift caused by the lesion in the image?" }, { "from": "gpt", "value": "The answer is No mass effect or midline shift is apparent" } ], "clip_score": 25.140625, "loss": 2.5550525188446045 }, { "id": "identity_97833", "image": "CLEVR-Math/images/CLEVR_train_029644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the tiny yellow block?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.71875, "loss": 2.6479885578155518 }, { "id": "identity_330323", "image": "TextVQA/images/23c36051029d2c2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is the player in this outfit?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.015625, "loss": 2.571791648864746 }, { "id": "identity_50169", "image": "DVQA/images/bar_train_00113261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of 'spirit' and 'union' for 'defeat'?" }, { "from": "gpt", "value": "The answer is Approximately 140" } ], "clip_score": 24.203125, "loss": 2.6968328952789307 }, { "id": "identity_296266", "image": "TabMWP/images/22228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Monica requires to purchase both a rosebush and a lawn chair?" }, { "from": "gpt", "value": "The answer is 28" } ], "clip_score": 30.828125, "loss": 2.6577327251434326 }, { "id": "identity_262170", "image": "TextVQA/images/1cda533fa02b6f8f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which day of the week is it currently?" }, { "from": "gpt", "value": "The answer is saturday" } ], "clip_score": 18.25, "loss": 2.8074522018432617 }, { "id": "identity_176966", "image": "IconQA/images/89732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.640625, "loss": 2.1119191646575928 }, { "id": "identity_7742", "image": "MapQA/images/map_7684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Colorado ?" }, { "from": "gpt", "value": "The answer is 50.2%-50.9%" } ], "clip_score": 24.5, "loss": 2.1158688068389893 }, { "id": "identity_198721", "image": "A-OKVQA/images/369221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.921875, "loss": 3.2959365844726562 }, { "id": "identity_4593", "image": "Super-CLEVR/images/superCLEVR_new_000696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray car and the cyan chopper made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.078125, "loss": 5.712092876434326 }, { "id": "identity_163547", "image": "GeoQA+/images/5445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle d?" }, { "from": "gpt", "value": "The answer is 125°" } ], "clip_score": 31.546875, "loss": 2.256967782974243 }, { "id": "identity_300712", "image": "PMC-VQA/images/PMC6356492_micromachines-10-00010-f015_430195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was this image sourced from?\nChoices:\n(A) Google Images\n(B) National Geographic\n(C) Science journal\n(D) The New York Times" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.296875, "loss": 3.8148128986358643 }, { "id": "identity_226668", "image": "GeoQA+/images/3571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD是⊙O的内接四边形,AD与BC的延长线交于点E,BA与CD的延长线交于点F,∠DCE=80°,∠F=25°,求∠E与∠A的和是多少度?" }, { "from": "gpt", "value": "The answer is 100° + ∠A" } ], "clip_score": 24.171875, "loss": 2.0582664012908936 }, { "id": "identity_216275", "image": "A-OKVQA/images/235237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hotel in the background?" }, { "from": "gpt", "value": "The answer is Sandman Suites" } ], "clip_score": 17.015625, "loss": 2.936706781387329 }, { "id": "identity_278121", "image": "FigureQA/images/56344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Peru or Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.109375, "loss": 4.694717884063721 }, { "id": "identity_186429", "image": "PMC-VQA/images/PMC6480864_Fig1_470600.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible fracture in the bone?" }, { "from": "gpt", "value": "The answer is No visible fracture" } ], "clip_score": 32.4375, "loss": 3.1030633449554443 }, { "id": "identity_180951", "image": "TabMWP/images/12273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more train car is added, how many students will there be in total?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 30.015625, "loss": 2.3277461528778076 }, { "id": "identity_92304", "image": "TabMWP/images/23144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of the group?" }, { "from": "gpt", "value": "The answer is 151.75" } ], "clip_score": 26.203125, "loss": 1.6345455646514893 }, { "id": "identity_161372", "image": "Geometry3K/images/1468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the supplementary angle of the angle formed by the transversal and line q?" }, { "from": "gpt", "value": "The answer is The supplementary angle is 123°" } ], "clip_score": 29.1875, "loss": 2.1384267807006836 }, { "id": "identity_67706", "image": "TQA/images/abc_question_images/human_system_ear_16098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label L represent in the diagram?" }, { "from": "gpt", "value": "The answer is Semicircular canals" } ], "clip_score": 23.21875, "loss": 2.3722331523895264 }, { "id": "identity_234907", "image": "UniGeo/images/3439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知AD、BC分别是⊙O的两条弦,AD∥BC,∠AOC=80°,求∠DAB和∠ABC的度数之和。" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 29.65625, "loss": 1.9072890281677246 }, { "id": "identity_62178", "image": "ChartQA/images/12194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage more is Very unfavorable than Favorable?" }, { "from": "gpt", "value": "The answer is 34%" } ], "clip_score": 31.703125, "loss": 1.6982330083847046 }, { "id": "identity_310063", "image": "PMC-VQA/images/PMC4053344_pone-0099225-g003_296901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in images B and D?\nChoices:\n(A) red chlorophyll fluorescence\n(B) functional GFP genes produced by Cas9/sgRNA-mediated mutagenesis\n(C) nonfunctional mutant GFP genes\n(D) merged image of red chlorophyll fluorescence and GFP fluorescence" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.0625, "loss": 4.109149932861328 }, { "id": "identity_326563", "image": "AI2D/images/4562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is defined as the part of a plant embryo that develops into a root?\nChoices:\n(A) cotyledonary node\n(B) radicle\n(C) old seed coat\n(D) cotyledon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.28125, "loss": 3.702665328979492 }, { "id": "identity_68217", "image": "AI2D/images/301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food sources does the hawk have in this ecosystem?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.265625, "loss": 4.508431434631348 }, { "id": "identity_331471", "image": "GeoQA+/images/5045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,点A,B,C都在圆上,若∠C=32°,则∠AOB的度数是()\nChoices:\n(A) 32°\n(B) 60°\n(C) 64°\n(D) 72°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 2.7377350330352783 }, { "id": "identity_4018", "image": "TQA/images/question_images/human_system_digestive_3783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is solid waste created?\nChoices:\n(A) Large intestine\n(B) Small intestine\n(C) Stomach\n(D) Appendix" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.0625, "loss": 3.740891218185425 }, { "id": "identity_111144", "image": "PMC-VQA/images/PMC5775193_Fig1_265878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible markers or annotations on the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.4375, "loss": 3.200618267059326 }, { "id": "identity_326206", "image": "A-OKVQA/images/146397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many individuals are partially skating on one wheel?\nChoices:\n(A) two\n(B) one\n(C) seven\n(D) four" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.6875, "loss": 3.432074785232544 }, { "id": "identity_138619", "image": "PlotQA/images/49122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region has the lowest proportion of economically active population in 1995?" }, { "from": "gpt", "value": "The answer is Kiribati" } ], "clip_score": 32.03125, "loss": 4.016988754272461 }, { "id": "identity_281978", "image": "A-OKVQA/images/116358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the game, what is the specific time of day being depicted among the following options: (A) midnight, (B) twilight, (C) dusk, or (D) dawn?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6328125, "loss": 4.4404425621032715 }, { "id": "identity_264042", "image": "A-OKVQA/images/136699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which demographic is most likely to own the items listed above?\nChoices:\n(A) child\n(B) grand father\n(C) female\n(D) male" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.1328125, "loss": 4.010149955749512 }, { "id": "identity_287090", "image": "TabMWP/images/35204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between a slice of cherry pie and a bowl of clam chowder?" }, { "from": "gpt", "value": "The answer is 0.82" } ], "clip_score": 40.0, "loss": 2.4683306217193604 }, { "id": "identity_249747", "image": "CLEVR-Math/images/CLEVR_train_064964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are left if you remove all the spherical objects and then subtract 1 brown ball?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.515625, "loss": 2.2081305980682373 }, { "id": "identity_53496", "image": "PlotQA/images/137098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the lowest fuel imports in 1965?" }, { "from": "gpt", "value": "The answer is Sweden" } ], "clip_score": 24.3125, "loss": 3.021864891052246 }, { "id": "identity_95340", "image": "TabMWP/images/18418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of classrooms per school?" }, { "from": "gpt", "value": "The answer is 40 classrooms" } ], "clip_score": 27.75, "loss": 2.3490633964538574 }, { "id": "identity_142896", "image": "ChartQA/images/two_col_60678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years show an increase in data value?" }, { "from": "gpt", "value": "The answer is 2 years" } ], "clip_score": 24.703125, "loss": 2.6253063678741455 }, { "id": "identity_105004", "image": "Super-CLEVR/images/superCLEVR_new_000579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles that are the same color as the train?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.484375, "loss": 3.5347800254821777 }, { "id": "identity_30335", "image": "DVQA/images/bar_train_00066582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the smallest summed value?" }, { "from": "gpt", "value": "The answer is mad" } ], "clip_score": 21.953125, "loss": 3.82104754447937 }, { "id": "identity_188144", "image": "A-OKVQA/images/408528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.796875, "loss": 4.405293941497803 }, { "id": "identity_276004", "image": "ScienceQA/images/19463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Maryland?\nChoices:\n(A) Phoenix\n(B) Manchester\n(C) Annapolis\n(D) Dover" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.390625, "loss": 4.15746545791626 }, { "id": "identity_106600", "image": "VizWiz/images/VizWiz_train_00015479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the multi-tool in the image?" }, { "from": "gpt", "value": "The answer is Leatherman" } ], "clip_score": 32.625, "loss": 2.23772931098938 }, { "id": "identity_84352", "image": "UniGeo/images/3112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle B'AD after the rotation?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 30.0625, "loss": 1.619775414466858 }, { "id": "identity_274616", "image": "FigureQA/images/10333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.515625, "loss": 4.492938041687012 }, { "id": "identity_296103", "image": "FigureQA/images/13325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Sky Blue or Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.84375, "loss": 4.625658988952637 }, { "id": "identity_327220", "image": "A-OKVQA/images/528713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is going to happen?\nChoices:\n(A) horse parade\n(B) horse racing\n(C) horse trading\n(D) horse show" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.109375, "loss": 4.236560821533203 }, { "id": "identity_26531", "image": "TabMWP/images/8455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a deer postcard cost than a rose stamp?" }, { "from": "gpt", "value": "The answer is 0.40" } ], "clip_score": 32.03125, "loss": 1.981170415878296 }, { "id": "identity_249421", "image": "GeoQA+/images/5325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知在⊙O中,CD是⊙O的直径,点A、B在⊙O上,且AC=AB,若∠BCD=26°,求∠AOB的度数。" }, { "from": "gpt", "value": "The answer is 308°" } ], "clip_score": 28.25, "loss": 2.489061117172241 }, { "id": "identity_291947", "image": "DVQA/images/bar_train_00105570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which stack of bars is labeled as the second from the bottom?" }, { "from": "gpt", "value": "The answer is start" } ], "clip_score": 25.390625, "loss": 3.8338680267333984 }, { "id": "identity_166355", "image": "TabMWP/images/20576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of pages per book?" }, { "from": "gpt", "value": "The answer is 82 and 84" } ], "clip_score": 32.5625, "loss": 3.050851345062256 }, { "id": "identity_287744", "image": "TabMWP/images/13672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of computers counted by the teachers in the classrooms of a middle school?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 36.5625, "loss": 3.1099417209625244 }, { "id": "identity_277606", "image": "TabMWP/images/18848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the survey conducted by a reporter to investigate the correlation between the economy and movie attendance, people were asked about the number of movies they watched last month. How many individuals reported going to the movies more than once?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 33.40625, "loss": 2.218459367752075 }, { "id": "identity_56917", "image": "DVQA/images/bar_train_00011611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the least preference among people according to the chart?" }, { "from": "gpt", "value": "The answer is stick" } ], "clip_score": 30.421875, "loss": 3.096057415008545 }, { "id": "identity_141818", "image": "DocVQA/images/ptvp0225_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of Direct Mail Packs for all three launches combined?" }, { "from": "gpt", "value": "The answer is 2.62MM" } ], "clip_score": 22.390625, "loss": 2.687079429626465 }, { "id": "identity_335206", "image": "PMC-VQA/images/PMC7843582_Fig1_355429.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the two probabilistic tractography maps superimposed on this image?\nChoices:\n(A) Medial Lemniscus and Brainstem \n(B) Inferior Longitudinal Fasciculus and the Cingulum \n(C) Inferior Longitudinal Fasciculus and Inferior Fronto-Occipital Fasciculus \n(D) Corpus Callosum and Uncinate Fasciculus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.09375, "loss": 3.6875476837158203 }, { "id": "identity_276943", "image": "PMC-VQA/images/PMC4151864_fig5_317318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the pattern observed in the image, what was the initial suspicion regarding the condition shown?\nChoices:\n(A) Benignity \n(B) Infection \n(C) Malignancy \n(D) None of the Above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.296875, "loss": 4.023498058319092 }, { "id": "identity_125498", "image": "VQA-RAD/images/synpic46764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely orientation of the patient's head during the acquisition of this image?" }, { "from": "gpt", "value": "The answer is Eyes facing upwards, parallel to the ground" } ], "clip_score": 30.828125, "loss": 3.24214506149292 }, { "id": "identity_257792", "image": "A-OKVQA/images/279763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are being flown in the image, and what is the color of the person who controls the second most number of kites?" }, { "from": "gpt", "value": "The answer is none" } ], "clip_score": 25.171875, "loss": 4.5948166847229 }, { "id": "identity_46902", "image": "DocVQA/images/mfvf0023_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the Law Department mentioned in the document?" }, { "from": "gpt", "value": "The answer is Winston-Salem, North Carolina 27102" } ], "clip_score": 23.796875, "loss": 1.6161140203475952 }, { "id": "identity_112538", "image": "VQA-RAD/images/synpic19853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of renal stones in the right kidney?" }, { "from": "gpt", "value": "The answer is No evidence of renal stones in the right kidney" } ], "clip_score": 25.734375, "loss": 1.891736626625061 }, { "id": "identity_213566", "image": "VQA2.0/images/427914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animals are the larger ones in the image?" }, { "from": "gpt", "value": "The answer is Elephants" } ], "clip_score": 24.0625, "loss": 2.989417314529419 }, { "id": "identity_129513", "image": "VQA-AS/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct shadows are cast on the ground?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.421875, "loss": 4.758747577667236 }, { "id": "identity_23767", "image": "Geometry3K/images/291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the measure of \\angle A. Round to the nearest degree.\nChoices:\n(A) 64\n(B) 69\n(C) 74\n(D) 79" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.796875, "loss": 2.9452009201049805 }, { "id": "identity_71242", "image": "AI2D/images/1683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which phase of the moon would an eclipse like the one shown in the diagram occur?" }, { "from": "gpt", "value": "The answer is Full Moon" } ], "clip_score": 29.453125, "loss": 2.5082452297210693 }, { "id": "identity_255953", "image": "IconQA/images/34375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of points on all the stars combined?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 28.484375, "loss": 1.6390596628189087 }, { "id": "identity_216944", "image": "A-OKVQA/images/210052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the rider wearing on his feet?" }, { "from": "gpt", "value": "The answer is Black boots" } ], "clip_score": 29.3125, "loss": 2.973818063735962 }, { "id": "identity_277685", "image": "VizWiz/images/VizWiz_train_00006572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this?" }, { "from": "gpt", "value": "The answer is red white" } ], "clip_score": 24.375, "loss": 4.1012043952941895 }, { "id": "identity_78733", "image": "AI2D/images/3265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"mitochondrion\"?" }, { "from": "gpt", "value": "The answer is It generates the cell's energy" } ], "clip_score": 25.125, "loss": 2.8792290687561035 }, { "id": "identity_170835", "image": "TabMWP/images/37390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of purchasing one of each book?" }, { "from": "gpt", "value": "The answer is $73.00" } ], "clip_score": 33.59375, "loss": 1.6208248138427734 }, { "id": "identity_21230", "image": "PlotQA/images/147765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does \"2001\" appear as one of the legend labels in the graph ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.890625, "loss": 3.1890900135040283 }, { "id": "identity_106250", "image": "PMC-VQA/images/PMC4989014_Fig5_63839.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any abnormality visible in the brain?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.8125, "loss": 2.736665725708008 }, { "id": "identity_258751", "image": "A-OKVQA/images/17272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the umbrellas contribute to the comfort of the people on the beach, considering the environmental conditions and the activities they are engaged in?" }, { "from": "gpt", "value": "The answer is Provide shade and reduce heat exposure for comfort" } ], "clip_score": 32.28125, "loss": 3.20406436920166 }, { "id": "identity_52642", "image": "FigureQA/images/64962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value recorded for Dark Seafoam on the graph?" }, { "from": "gpt", "value": "The answer is Approximately 76" } ], "clip_score": 26.96875, "loss": 2.9296040534973145 }, { "id": "identity_22882", "image": "TextVQA/images/3f4e503184a49931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the number of the taxi on the left?" }, { "from": "gpt", "value": "The answer is 4k72" } ], "clip_score": 26.671875, "loss": 2.1683666706085205 }, { "id": "identity_124046", "image": "TextVQA/images/01d7263395d9a493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the frame of the photo on the shelf below the TV?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.515625, "loss": 3.990191698074341 }, { "id": "identity_209171", "image": "A-OKVQA/images/156685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the boat on the far left?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.28125, "loss": 3.688814640045166 }, { "id": "identity_18659", "image": "DocVQA/images/lzlh0077_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the company mentioned ?" }, { "from": "gpt", "value": "The answer is R.J. Reynolds Tobacco Company" } ], "clip_score": 19.25, "loss": 1.7151802778244019 }, { "id": "identity_200987", "image": "A-OKVQA/images/35726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the boat's railing?" }, { "from": "gpt", "value": "The answer is Vertical stripes" } ], "clip_score": 26.609375, "loss": 2.629899263381958 }, { "id": "identity_229481", "image": "PMC-VQA/images/PMC9348820_F1_363019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the vascular pruning events between the control (Ctrl) and the experimental (Sf1ΔiECKO) groups in the confocal images. How do the differences in vascular pruning events between these groups reflect on the overall vascular network structure?" }, { "from": "gpt", "value": "The answer is Experimental group shows more pruning, leading to less dense network" } ], "clip_score": 32.6875, "loss": 2.8559134006500244 }, { "id": "identity_219078", "image": "TextVQA/images/1c1e6212320b3c5d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player running to home plate?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 26.375, "loss": 2.1099936962127686 }, { "id": "identity_194545", "image": "PMC-VQA/images/PMC4330230_Fig7_358755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is this image from?" }, { "from": "gpt", "value": "The answer is T2-weighted sequence" } ], "clip_score": 30.109375, "loss": 1.9761416912078857 }, { "id": "identity_46351", "image": "FigureQA/images/34367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Periwinkle" } ], "clip_score": 35.78125, "loss": 2.2591958045959473 }, { "id": "identity_290441", "image": "IconQA/images/60027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.3125, "loss": 2.7481861114501953 }, { "id": "identity_131348", "image": "TextVQA/images/521b4dce31008459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the theatre shown in the image?" }, { "from": "gpt", "value": "The answer is Princess Theatre" } ], "clip_score": 23.640625, "loss": 3.637083053588867 }, { "id": "identity_307092", "image": "PMC-VQA/images/PMC3065903_fig1_91315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain shows increased RCBF?\nChoices:\n(A) Posterior cingulate cortex\n(B) Medial temporal lobe\n(C) Cerebellum\n(D) Amygdala" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.8125, "loss": 3.665372133255005 }, { "id": "identity_282072", "image": "ScienceQA/images/9700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these two objects share among the following choices: scratchy or bouncy?\nChoices:\n(A) scratchy\n(B) bouncy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.609375, "loss": 4.2051191329956055 }, { "id": "identity_37735", "image": "ChartQA/images/multi_col_100653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the ratio of largest value of light blue and gray graph (in A:B)?" }, { "from": "gpt", "value": "The answer is 1.023809524" } ], "clip_score": 23.859375, "loss": 2.4497909545898438 }, { "id": "identity_111698", "image": "VQA-AS/images/809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is in the pot?" }, { "from": "gpt", "value": "The answer is A potted bush" } ], "clip_score": 18.25, "loss": 3.774181604385376 }, { "id": "identity_20379", "image": "PMC-VQA/images/PMC7718624_j_tnsci-2020-0145_fig_003_315715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in the image? \nChoices:\n(A) Calcification of the bone tissue \n(B) Calcification of the epidural space \n(C) Calcification of the brain tissue \n(D) Calcification of the dura mater. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.265625, "loss": 4.213352680206299 }, { "id": "identity_257198", "image": "Geometry3K/images/1358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that \\( m \\angle 11 = 62^\\circ \\) and \\( m \\angle 14 = 38^\\circ \\), find the measure of \\( \\angle 2 \\)" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 26.234375, "loss": 2.6145694255828857 }, { "id": "identity_218991", "image": "A-OKVQA/images/346026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pigeons can be seen on the ground?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.453125, "loss": 2.4868967533111572 }, { "id": "identity_54795", "image": "FigureQA/images/23841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Medium Purple section in the pie chart?" }, { "from": "gpt", "value": "The answer is 66.67%" } ], "clip_score": 39.25, "loss": 1.3237652778625488 }, { "id": "identity_25291", "image": "IconQA/images/25769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.453125, "loss": 4.116363525390625 }, { "id": "identity_83725", "image": "GeoQA+/images/3586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle A in the right triangle ABC?" }, { "from": "gpt", "value": "The answer is 22.62 degrees" } ], "clip_score": 32.875, "loss": 2.3942439556121826 }, { "id": "identity_331691", "image": "TQA/images/abc_question_images/human_system_excretory_16110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the kidneys?\nChoices:\n(A) V\n(B) X\n(C) Y\n(D) S" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.5, "loss": 4.203344821929932 }, { "id": "identity_234113", "image": "ScienceQA/images/13519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Emmy's test shows that adding a specific nutrient helps bacteria produce 20% more insulin, how would you design an experiment to determine the optimal concentration of this nutrient for maximum insulin production, and what would be the expected outcome if the optimal concentration is found?" }, { "from": "gpt", "value": "The answer is The optimal concentration of the nutrient for maximum insulin production" } ], "clip_score": 23.75, "loss": 2.0225305557250977 }, { "id": "identity_74364", "image": "ScienceQA/images/2819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnetic force is acting between the two magnets in the image?" }, { "from": "gpt", "value": "The answer is Attraction" } ], "clip_score": 20.265625, "loss": 2.3766331672668457 }, { "id": "identity_177889", "image": "TabMWP/images/13163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a recipe calls for 2 pounds of spaghetti squash and 1 pound of pumpkin, how much would the ingredients cost?" }, { "from": "gpt", "value": "The answer is $7" } ], "clip_score": 35.125, "loss": 2.859327554702759 }, { "id": "identity_152044", "image": "ScienceQA/images/5127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states are highlighted on this map?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.0625, "loss": 2.4890267848968506 }, { "id": "identity_102817", "image": "CLEVR-Math/images/CLEVR_train_069010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are smaller than the red cube?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.765625, "loss": 2.673203706741333 }, { "id": "identity_300398", "image": "UniGeo/images/498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 用半径为30cm、圆心角为120°的扇形做成一个圆锥的侧面,那么圆锥底面的半径是多少?\n选项:\n(A) 10cm\n(B) 20cm\n(C) 10πcm\n(D) 20πcm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.5625, "loss": 2.8193774223327637 }, { "id": "identity_311785", "image": "TQA/images/question_images/earth_eclipses_1668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens during a solar eclipse?\nChoices:\n(A) The sun is between the moon and the Earth\n(B) The moon is next to the sun next to the Earth\n(C) The moon stays in the middle between the sun and the Earth\n(D) The Earth is between the moon and the sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.265625, "loss": 4.573644638061523 }, { "id": "identity_333289", "image": "DocVQA/images/yhgk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the P.O. Box number mentioned in this document?" }, { "from": "gpt", "value": "The answer is 1734" } ], "clip_score": 19.03125, "loss": 2.5529980659484863 }, { "id": "identity_145634", "image": "DocVQA/images/rhhx0023_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the invoice?" }, { "from": "gpt", "value": "The answer is 4/14/98" } ], "clip_score": 24.46875, "loss": 2.6364357471466064 }, { "id": "identity_87812", "image": "UniGeo/images/941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠ACB = 20°, what is the area of the sector formed by ∠AOB?" }, { "from": "gpt", "value": "The answer is (1/9) * π * r^2 (where r is the radius of the circle)" } ], "clip_score": 35.0625, "loss": 1.2268418073654175 }, { "id": "identity_91390", "image": "Geometry3K/images/1425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the figure?" }, { "from": "gpt", "value": "The answer is 34" } ], "clip_score": 28.640625, "loss": 2.6351561546325684 }, { "id": "identity_166943", "image": "TabMWP/images/13689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pizzas delivered over the past 6 days?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 34.9375, "loss": 1.994655966758728 }, { "id": "identity_4284", "image": "Super-CLEVR/images/superCLEVR_new_001442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a small object that is the same color as the large rubber minivan; what is its material?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 27.28125, "loss": 2.43796968460083 }, { "id": "identity_93678", "image": "TabMWP/images/3507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of science articles read by the students?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 36.46875, "loss": 2.5660226345062256 }, { "id": "identity_48900", "image": "DVQA/images/bar_train_00191642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the 'sign' algorithm on the 'title' dataset?" }, { "from": "gpt", "value": "The answer is Approximately 60" } ], "clip_score": 31.71875, "loss": 2.6454219818115234 }, { "id": "identity_1431", "image": "Super-CLEVR/images/superCLEVR_new_000026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the green bicycle?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 22.515625, "loss": 4.0994462966918945 }, { "id": "identity_42703", "image": "FigureQA/images/59403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Violet Red in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 20%" } ], "clip_score": 40.5625, "loss": 2.064622640609741 }, { "id": "identity_11516", "image": "DocVQA/images/lffn0226_48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does Figure 14 in this document describe?" }, { "from": "gpt", "value": "The answer is Looking down on to the greenhouse roof showing three cloth infuser bags inflated with clean air." } ], "clip_score": 22.5625, "loss": 3.3401379585266113 }, { "id": "identity_29124", "image": "TabMWP/images/1917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A farm equipment company kept a record of the number of tractors made each month. In which month did the company make the fewest tractors?\nChoices:\n(A) June\n(B) July\n(C) August\n(D) September" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.28125, "loss": 3.524024248123169 }, { "id": "identity_56649", "image": "FigureQA/images/73619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Medium Turquoise on the x-axis?" }, { "from": "gpt", "value": "The answer is 100 - 0 = 100" } ], "clip_score": 30.0, "loss": 1.9131646156311035 }, { "id": "identity_151673", "image": "ScienceQA/images/972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the object in the image conduct electricity?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.1796875, "loss": 2.999204158782959 }, { "id": "identity_28236", "image": "ScienceQA/images/5688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Select the bird below.\nChoices:\n(A) mandarinfish\n(B) American bullfrog\n(C) emerald hummingbird\n(D) California toad" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0703125, "loss": 4.2103400230407715 }, { "id": "identity_75519", "image": "TQA/images/question_images/parts_neuron_7224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the diagram, where do the terminal buttons form junctions?" }, { "from": "gpt", "value": "The answer is They form junctions with other cells" } ], "clip_score": 19.03125, "loss": 1.9138559103012085 }, { "id": "identity_4845", "image": "PMC-VQA/images/PMC7847589_Fig2_356631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the post-contrast MRI show?\nChoices:\n(A) Calcified dot \n(B) Peripheral edema \n(C) Restricted diffusion \n(D) Garland-like enhancement pattern " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.578125, "loss": 3.6988799571990967 }, { "id": "identity_216592", "image": "VQA2.0/images/450833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any spectators in the background?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.5625, "loss": 2.9853224754333496 }, { "id": "identity_323857", "image": "A-OKVQA/images/564589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many grown-ups are there in the picture?\nChoices:\n(A) eight\n(B) five\n(C) nine\n(D) two" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5078125, "loss": 3.7017102241516113 }, { "id": "identity_329795", "image": "DocVQA/images/qqmf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who worked on Hypertension and Human Health: The Salt Question?" }, { "from": "gpt", "value": "The answer is Edward R. ricciuti" } ], "clip_score": 21.203125, "loss": 3.3010852336883545 }, { "id": "identity_85750", "image": "GeoQA+/images/260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of point C above the ground when the seat is adjusted to a height of 0.9m?" }, { "from": "gpt", "value": "The answer is 20cm" } ], "clip_score": 29.390625, "loss": 2.7214410305023193 }, { "id": "identity_307088", "image": "Geometry3K/images/905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Triangle J K L is circumscribed about a circle. Find x.\nChoices:\n(A) 3\n(B) 4\n(C) 7\n(D) 9" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.875, "loss": 2.3612492084503174 }, { "id": "identity_147128", "image": "DocVQA/images/lmpv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book mentioned in the document?" }, { "from": "gpt", "value": "The answer is The Beet Sugar Story" } ], "clip_score": 20.984375, "loss": 2.292107105255127 }, { "id": "identity_130241", "image": "Super-CLEVR/images/superCLEVR_new_000251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the teal car have its front facing the camera?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.5625, "loss": 3.438232421875 }, { "id": "identity_157565", "image": "AI2D/images/1116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers of mesophyll are shown in this cross-section of a leaf?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 36.4375, "loss": 4.276896953582764 }, { "id": "identity_215681", "image": "A-OKVQA/images/455881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the horse's tail tied up or left loose?" }, { "from": "gpt", "value": "The answer is Tied up" } ], "clip_score": 19.4375, "loss": 2.0749740600585938 }, { "id": "identity_84910", "image": "GeoQA+/images/1083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC in the given triangle ABC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 35.03125, "loss": 2.0626752376556396 }, { "id": "identity_328598", "image": "DVQA/images/bar_train_00113823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.671875, "loss": 3.6986141204833984 }, { "id": "identity_88976", "image": "UniGeo/images/3328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if angle B is 60 degrees and angle C is 70 degrees, and circle O has AB as its diameter, what is the measure of angle OCB?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 33.4375, "loss": 1.538982629776001 }, { "id": "identity_34949", "image": "DVQA/images/bar_train_00112888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is means" } ], "clip_score": 33.1875, "loss": 4.824815273284912 }, { "id": "identity_213972", "image": "TextVQA/images/73dd9803b9f1b96b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color scheme of the book cover?" }, { "from": "gpt", "value": "The answer is Red, white, and blue" } ], "clip_score": 25.46875, "loss": 2.18607759475708 }, { "id": "identity_114311", "image": "PMC-VQA/images/PMC6356492_micromachines-10-00010-f015_430195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electron microscope was used based on the image details?" }, { "from": "gpt", "value": "The answer is Scanning Electron Microscope" } ], "clip_score": 28.234375, "loss": 1.7419536113739014 }, { "id": "identity_121188", "image": "Super-CLEVR/images/superCLEVR_new_000895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle on the left side of the image?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 28.546875, "loss": 3.382999897003174 }, { "id": "identity_312328", "image": "AI2D/images/1998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the food web shown, which of the following can be classified as Producers?\nChoices:\n(A) Nematodes\n(B) Bacteria\n(C) Protozoa\n(D) Plants" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.796875, "loss": 3.247591018676758 }, { "id": "identity_210569", "image": "PMC-VQA/images/PMC3971497_fig2_277808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is labeled with \"GB\" in the image?" }, { "from": "gpt", "value": "The answer is Gallbladder" } ], "clip_score": 26.640625, "loss": 2.577592372894287 }, { "id": "identity_7893", "image": "AI2D/images/2973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the coronal hole?\nChoices:\n(A) i\n(B) f\n(C) h\n(D) k" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.546875, "loss": 3.844423770904541 }, { "id": "identity_280508", "image": "IconQA/images/18214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, given that the first picture is a rocket?\nChoices:\n(A) moon\n(B) star\n(C) rocket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.546875, "loss": 4.509746074676514 }, { "id": "identity_235788", "image": "Super-CLEVR/images/superCLEVR_new_000698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are to the right of the cyan object, and what are their colors?" }, { "from": "gpt", "value": "The answer is Two objects, red and purple" } ], "clip_score": 19.921875, "loss": 3.1694421768188477 }, { "id": "identity_108905", "image": "TextVQA/images/ef7e36e7e5882f9e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accent color on the backpack?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 27.078125, "loss": 3.1836588382720947 }, { "id": "identity_258443", "image": "CLEVR-Math/images/CLEVR_train_005363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many matte blocks are there in total, and how many shiny blocks are there in total?" }, { "from": "gpt", "value": "The answer is 5 matte blocks, 4 shiny blocks" } ], "clip_score": 25.40625, "loss": 1.6679251194000244 }, { "id": "identity_214802", "image": "VQA2.0/images/385186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the puppy's tail?" }, { "from": "gpt", "value": "The answer is Upwards" } ], "clip_score": 25.296875, "loss": 4.094529151916504 }, { "id": "identity_67645", "image": "ScienceQA/images/18481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the magnets were to move, in which direction would the left magnet likely move?" }, { "from": "gpt", "value": "The answer is To the left" } ], "clip_score": 18.28125, "loss": 3.1932790279388428 }, { "id": "identity_281128", "image": "DVQA/images/bar_train_00154018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all the stores, which item had the highest total number of units sold?" }, { "from": "gpt", "value": "The answer is angle" } ], "clip_score": 26.484375, "loss": 3.409050703048706 }, { "id": "identity_321670", "image": "Geometry3K/images/1783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x in the diagram.\nChoices:\n(A) 15\n(B) 34\n(C) 39.5\n(D) 142" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.75, "loss": 3.352147340774536 }, { "id": "identity_255593", "image": "DocVQA/images/ytxn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the fax numbers for both the sender and the recipient, and what is the sum of these fax numbers?" }, { "from": "gpt", "value": "The answer is 12133" } ], "clip_score": 20.15625, "loss": 3.2268593311309814 }, { "id": "identity_318350", "image": "UniGeo/images/356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在⊙O,AB为直径,C为上一点,若∠CAB=23°,则∠ABC的度数为()\nChoices:\n(A) 23°\n(B) 46°\n(C) 57°\n(D) 67°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.984375, "loss": 2.7163784503936768 }, { "id": "identity_91628", "image": "Geometry3K/images/81.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment BB'?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.21875, "loss": 2.229755163192749 }, { "id": "identity_254138", "image": "IconQA/images/73296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green circles are there in the image?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 30.71875, "loss": 2.126617431640625 }, { "id": "identity_138762", "image": "PlotQA/images/58443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of borrowers involved with commercial banks in Pakistan over the years shown?" }, { "from": "gpt", "value": "The answer is Approximately 200" } ], "clip_score": 26.328125, "loss": 2.6405162811279297 }, { "id": "identity_125647", "image": "VQA-AS/images/11085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the person's pants?" }, { "from": "gpt", "value": "The answer is Black and white striped" } ], "clip_score": 15.2734375, "loss": 3.86975359916687 }, { "id": "identity_292586", "image": "IconQA/images/24866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of fish present?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 22.84375, "loss": 1.9719911813735962 }, { "id": "identity_331290", "image": "VQA-AS/images/13787.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people in this scene?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.828125, "loss": 3.4588191509246826 }, { "id": "identity_5060", "image": "DVQA/images/bar_train_00053922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values smaller than 0?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 23.03125, "loss": 3.481102705001831 }, { "id": "identity_153673", "image": "ScienceQA/images/11021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Utah" } ], "clip_score": 32.9375, "loss": 3.0057640075683594 }, { "id": "identity_98010", "image": "TabMWP/images/28795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of tickets sold in the past 7 days?" }, { "from": "gpt", "value": "The answer is 83" } ], "clip_score": 30.421875, "loss": 2.5722999572753906 }, { "id": "identity_3147", "image": "UniGeo/images/3124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直尺的一条边经过一个直角顶点,直尺的另一条边与直角的一边相交,若∠1=30°,则∠2的度数是()\nChoices:\n(A) 30°\n(B) 45°\n(C) 60°\n(D) 120°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.4375, "loss": 3.0000081062316895 }, { "id": "identity_216181", "image": "VQA2.0/images/490959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the bike's tire?" }, { "from": "gpt", "value": "The answer is Knobby" } ], "clip_score": 23.421875, "loss": 2.7724361419677734 }, { "id": "identity_150230", "image": "DocVQA/images/jlbx0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of sales for the years 1974 and 1973 combined?" }, { "from": "gpt", "value": "The answer is $400,044,725" } ], "clip_score": 26.90625, "loss": 2.1572299003601074 }, { "id": "identity_139399", "image": "MapQA/images/map_8903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have more than 337 musicians?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.03125, "loss": 2.330414056777954 }, { "id": "identity_34291", "image": "A-OKVQA/images/367969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the woman likely returning home from?\nChoices:\n(A) mall\n(B) drug store\n(C) work\n(D) restaurant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.078125, "loss": 4.169649124145508 }, { "id": "identity_331245", "image": "DocVQA/images/pqhw0004_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this document dated?" }, { "from": "gpt", "value": "The answer is April 1995" } ], "clip_score": 24.8125, "loss": 2.275435209274292 }, { "id": "identity_191562", "image": "PMC-VQA/images/PMC8063182_fig0001_423112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is shown in the image?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI sequence" } ], "clip_score": 30.640625, "loss": 1.7302069664001465 }, { "id": "identity_99602", "image": "IconQA/images/20131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a stick, how much of the stick is left after cutting off 3 cm from it?" }, { "from": "gpt", "value": "The answer is 3 cm" } ], "clip_score": 31.40625, "loss": 2.267868757247925 }, { "id": "identity_14260", "image": "TQA/images/question_images/cycle_nitrogen_6713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does ammonia go in this diagram?\nChoices:\n(A) To nitrates in the soil\n(B) Denitrifying bacteria\n(C) Atmorpheric fixation\n(D) Industrial fixation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.3125, "loss": 3.6833572387695312 }, { "id": "identity_47872", "image": "PlotQA/images/21690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are plotted on the graph?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.734375, "loss": 2.5168001651763916 }, { "id": "identity_210900", "image": "PMC-VQA/images/PMC5978832_Fig2_321894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there symmetry of the patient's brain hemispheres?" }, { "from": "gpt", "value": "The answer is Yes, there is symmetry" } ], "clip_score": 26.90625, "loss": 2.5734527111053467 }, { "id": "identity_236596", "image": "UniGeo/images/1178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,直径AD与BC相交于点E,连接CD,若⊙O的半径为5,AB=AC=8,求△ABC的面积。" }, { "from": "gpt", "value": "The answer is 25.6 square units" } ], "clip_score": 28.3125, "loss": 1.8797260522842407 }, { "id": "identity_8457", "image": "AI2D/images/3066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents the golgi apparatus ?\nChoices:\n(A) B\n(B) J\n(C) S\n(D) Q" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.28125, "loss": 3.980195999145508 }, { "id": "identity_189347", "image": "TextVQA/images/16797cca37f387bb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team on the right side of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.3125, "loss": 2.90531849861145 }, { "id": "identity_195956", "image": "VQA2.0/images/46024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are on the desk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.6875, "loss": 4.818236351013184 }, { "id": "identity_172924", "image": "CLEVR-Math/images/CLEVR_train_044950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the purple sphere?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.125, "loss": 2.7120277881622314 }, { "id": "identity_72860", "image": "TQA/images/abc_question_images/human_system_ear_16099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the ear contains the ossicles, as indicated by the label?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.75, "loss": 2.5575313568115234 }, { "id": "identity_175230", "image": "TabMWP/images/36098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day was the highest number of watermelons harvested?" }, { "from": "gpt", "value": "The answer is Wednesday" } ], "clip_score": 35.78125, "loss": 2.3202457427978516 }, { "id": "identity_288927", "image": "GeoQA+/images/1734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, angle ACB is 90 degrees, AB is 2. BC is extended to point D such that BD is 3 times CD. If E is the midpoint of AC, what is the length of DE?\nChoices:\n(A) 1.5\n(B) √{2}\n(C) 1.5\n(D) 1" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.09375, "loss": 2.544842481613159 }, { "id": "identity_205130", "image": "TextVQA/images/0204574848b0bf05.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is taking place?" }, { "from": "gpt", "value": "The answer is A protest or rally" } ], "clip_score": 20.3125, "loss": 2.8537325859069824 }, { "id": "identity_256783", "image": "Super-CLEVR/images/superCLEVR_new_000352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are there in total on all the objects in the image?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 25.78125, "loss": 1.7952921390533447 }, { "id": "identity_5577", "image": "PMC-VQA/images/PMC4656230_Fig2_446575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the right globe is the mass located? \nChoices:\n(A) Anterior segment \n(B) Posterior segment \n(C) Lateral segment \n(D) Medial segment " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.96875, "loss": 4.463801860809326 }, { "id": "identity_298727", "image": "DVQA/images/bar_train_00036775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy when combining the accuracies of the algorithms jazz and ride?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 35.1875, "loss": 2.4349048137664795 }, { "id": "identity_44297", "image": "FigureQA/images/87926.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value for Dark Violet?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 22.546875, "loss": 2.605825662612915 }, { "id": "identity_153121", "image": "AI2D/images/4158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do sedimentary rocks contribute to the formation of metamorphic rocks?" }, { "from": "gpt", "value": "The answer is Heat and Pressure" } ], "clip_score": 25.890625, "loss": 2.379594326019287 }, { "id": "identity_292295", "image": "A-OKVQA/images/373988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the formation of a line by the building?\nChoices:\n(A) for fun\n(B) its mandatory\n(C) its popular\n(D) its raining" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.7421875, "loss": 4.217713356018066 }, { "id": "identity_308262", "image": "PMC-VQA/images/PMC7678266_Fig1_302022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which layer did the ultrasound detect this area?\nChoices:\n(A) First layer (mucosa)\n(B) Second layer (submucosa)\n(C) Third layer (muscularis propria)\n(D) Fourth layer (subserosa)" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.828125, "loss": 3.756187915802002 }, { "id": "identity_129975", "image": "PMC-VQA/images/PMC7251435_fig3_178850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the sulci and gyri well-defined in this image?" }, { "from": "gpt", "value": "The answer is Yes, they are well-defined" } ], "clip_score": 25.828125, "loss": 2.337597131729126 }, { "id": "identity_141815", "image": "DVQA/images/bar_train_00152935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the \"desk\" and \"fiber\" stores for the item \"melody\"?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.09375, "loss": 2.5197062492370605 }, { "id": "identity_81185", "image": "UniGeo/images/2480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of the smaller circle is 3cm and the radius of the larger circle is 5cm, what is the circumference of the larger circle?" }, { "from": "gpt", "value": "The answer is 10π cm" } ], "clip_score": 34.84375, "loss": 1.4569741487503052 }, { "id": "identity_224391", "image": "TabMWP/images/32904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering Estelle's requirements, calculate the total monthly cost for using each bank, including the checking account services and nonbank ATM transactions. Which bank offers the lower total monthly cost for Estelle?" }, { "from": "gpt", "value": "The answer is East Island Bank" } ], "clip_score": 30.90625, "loss": 2.587955951690674 }, { "id": "identity_135335", "image": "DocVQA/images/jtjk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the recipient of the letter?" }, { "from": "gpt", "value": "The answer is 716 Metrobank Building, Denver, Colorado 80202" } ], "clip_score": 21.671875, "loss": 2.301225423812866 }, { "id": "identity_105688", "image": "VQA2.0/images/338528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the banner above the clock?" }, { "from": "gpt", "value": "The answer is TO THE AVENUE APARTMENT" } ], "clip_score": 24.859375, "loss": 3.9686849117279053 }, { "id": "identity_157928", "image": "AI2D/images/4186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the arrow pointing from Metamorphic Rock to Magma?" }, { "from": "gpt", "value": "The answer is Melting" } ], "clip_score": 26.15625, "loss": 2.7744531631469727 }, { "id": "identity_293317", "image": "PMC-VQA/images/PMC5709000_F5_246781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the body is the mass located?\nChoices:\n(A) left uterine adnexa\n(B) pelvis\n(C) right uterine adnexa\n(D) right kidney" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.920085430145264 }, { "id": "identity_119982", "image": "VizWiz/images/VizWiz_train_00023749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the laptop displayed under \"Level 1\"?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.5625, "loss": 3.4576354026794434 }, { "id": "identity_67823", "image": "AI2D/images/1995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of plants in this food web?" }, { "from": "gpt", "value": "The answer is Source of food for Nematodes, Fungi, and Arthropods" } ], "clip_score": 34.5625, "loss": 2.0767247676849365 }, { "id": "identity_173032", "image": "IconQA/images/62906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 3 shapes, how many sides will remain in total?" }, { "from": "gpt", "value": "The answer is 72" } ], "clip_score": 29.5, "loss": 2.2420544624328613 }, { "id": "identity_241427", "image": "DocVQA/images/gsbg0072_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total termination fee if a contract is terminated on 08/31/88, 08/31/89, and 08/31/90?" }, { "from": "gpt", "value": "The answer is 2,400,000" } ], "clip_score": 27.1875, "loss": 1.3030141592025757 }, { "id": "identity_220999", "image": "A-OKVQA/images/180824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the financial institution on the building in the background?" }, { "from": "gpt", "value": "The answer is Bank of Montreal" } ], "clip_score": 15.2578125, "loss": 3.0211474895477295 }, { "id": "identity_275511", "image": "DocVQA/images/sqdl0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall cost?" }, { "from": "gpt", "value": "The answer is $106,871" } ], "clip_score": 22.6875, "loss": 2.63332200050354 }, { "id": "identity_190085", "image": "A-OKVQA/images/506508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is being used in the image?" }, { "from": "gpt", "value": "The answer is Horse-drawn cart" } ], "clip_score": 31.171875, "loss": 2.288851499557495 }, { "id": "identity_143711", "image": "DocVQA/images/hjpn0000_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the result of the transmission?" }, { "from": "gpt", "value": "The answer is OK" } ], "clip_score": 20.9375, "loss": 4.513894081115723 }, { "id": "identity_32907", "image": "IconQA/images/10931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.46875, "loss": 3.9542832374572754 }, { "id": "identity_51260", "image": "FigureQA/images/71840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Turquoise intersect with Medium Orchid at any point on the graph?" }, { "from": "gpt", "value": "The answer is Yes, they intersect" } ], "clip_score": 23.890625, "loss": 2.985170841217041 }, { "id": "identity_327665", "image": "MapQA/images/map_367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the first symbol in the legend represent the smallest range?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 7.75, "loss": 3.288097858428955 }, { "id": "identity_37177", "image": "TextVQA/images/294ddfe19f5063a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what car brand is the sponsor?" }, { "from": "gpt", "value": "The answer is rolls royce" } ], "clip_score": 15.2890625, "loss": 2.290813446044922 }, { "id": "identity_31276", "image": "VQA2.0/images/174876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many women are in this room?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.1875, "loss": 2.287989854812622 }, { "id": "identity_37057", "image": "A-OKVQA/images/573349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is being filmed here?\nChoices:\n(A) protest\n(B) cake walk\n(C) horse show\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.421875, "loss": 4.077432632446289 }, { "id": "identity_166410", "image": "IconQA/images/48932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the entire circle represents 100 points, how many points does the blue segment represent?" }, { "from": "gpt", "value": "The answer is 25 points" } ], "clip_score": 32.09375, "loss": 1.9715745449066162 }, { "id": "identity_39770", "image": "ChartQA/images/two_col_42769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the lowest value?" }, { "from": "gpt", "value": "The answer is Ruffhouse" } ], "clip_score": 20.515625, "loss": 2.933091402053833 }, { "id": "identity_46558", "image": "DocVQA/images/xplf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is the next meeting scheduled to start?" }, { "from": "gpt", "value": "The answer is 12:00 noon" } ], "clip_score": 14.734375, "loss": 1.6843897104263306 }, { "id": "identity_163847", "image": "GeoQA+/images/3241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle ABD?" }, { "from": "gpt", "value": "The answer is 24 cm" } ], "clip_score": 34.71875, "loss": 2.6653780937194824 }, { "id": "identity_241521", "image": "ScienceQA/images/7213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary chemical elements that make up magnetite, and how do these elements contribute to its magnetic properties?" }, { "from": "gpt", "value": "The answer is Iron and oxygen; electron exchange generates magnetism" } ], "clip_score": 24.203125, "loss": 4.032464027404785 }, { "id": "identity_249066", "image": "UniGeo/images/929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, A, B, C三点在⊙O上, 且∠ACB=50°, 求∠AOB和∠OBC的和是多少度?" }, { "from": "gpt", "value": "The answer is 140°" } ], "clip_score": 31.03125, "loss": 1.7591370344161987 }, { "id": "identity_193530", "image": "PMC-VQA/images/PMC4779085_Fig5_8741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of valve regurgitation in this echocardiography image?" }, { "from": "gpt", "value": "The answer is Requires analysis of color flow for reverse signal" } ], "clip_score": 29.625, "loss": 4.926999568939209 }, { "id": "identity_33912", "image": "CLEVR-Math/images/CLEVR_train_010317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cubes. How many objects are left?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.125, "loss": 2.2500240802764893 }, { "id": "identity_161884", "image": "Geometry3K/images/1786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 31.0, "loss": 2.943694829940796 }, { "id": "identity_242215", "image": "Super-CLEVR/images/superCLEVR_new_000155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image share the same material as the large gray bicycle, and what are they?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 30.15625, "loss": 2.8986127376556396 }, { "id": "identity_4061", "image": "PMC-VQA/images/PMC6692992_ccr32252-fig-0001_23256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to visualize the intrapericardial hematoma?\nChoices:\n(A) MRI \n(B) X-ray \n(C) Computed tomography \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 3.6237094402313232 }, { "id": "identity_196611", "image": "PMC-VQA/images/PMC6919442_F1_86269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No, there is no evidence of pleural effusion" } ], "clip_score": 32.78125, "loss": 1.5185599327087402 }, { "id": "identity_221677", "image": "A-OKVQA/images/75668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is prohibited from entering as indicated by the traffic sign?" }, { "from": "gpt", "value": "The answer is Motorcycles" } ], "clip_score": 20.484375, "loss": 2.1527915000915527 }, { "id": "identity_210434", "image": "VQA2.0/images/491430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the tennis racket?" }, { "from": "gpt", "value": "The answer is Prince" } ], "clip_score": 26.8125, "loss": 3.565648317337036 }, { "id": "identity_152890", "image": "ScienceQA/images/12199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the magnets in relation to each other?" }, { "from": "gpt", "value": "The answer is Opposite poles facing each other" } ], "clip_score": 25.9375, "loss": 2.0909886360168457 }, { "id": "identity_174533", "image": "TabMWP/images/37588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern in the increase of the number of purple beads from one bag to the next?" }, { "from": "gpt", "value": "The answer is Add 5 beads for each additional bag" } ], "clip_score": 40.9375, "loss": 2.6537320613861084 }, { "id": "identity_120725", "image": "PMC-VQA/images/PMC5874961_acm20315-fig-0012_295162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.375, "loss": 3.2840957641601562 }, { "id": "identity_259016", "image": "VQA2.0/images/494066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the job titles of all individuals in the dugout, including the man on the far right" }, { "from": "gpt", "value": "The answer is Players and coaches" } ], "clip_score": 26.515625, "loss": 3.0498344898223877 }, { "id": "identity_163080", "image": "GeoQA+/images/3604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is a tangent to circle O at point B, and BC is the diameter of circle O, if angle A is 20 degrees, what is the measure of angle BOC?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 35.5625, "loss": 1.641376256942749 }, { "id": "identity_100833", "image": "IconQA/images/64610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in the image excluding the cactus?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.71875, "loss": 2.228203058242798 }, { "id": "identity_188101", "image": "PMC-VQA/images/PMC6214163_Fig1_390456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the left lung in the image?" }, { "from": "gpt", "value": "The answer is Left lung appears normal" } ], "clip_score": 29.609375, "loss": 4.434863090515137 }, { "id": "identity_188418", "image": "PMC-VQA/images/PMC4592368_Fig1_429405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the picture?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.953125, "loss": 2.611900568008423 }, { "id": "identity_238732", "image": "FigureQA/images/60047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average y-axis value of Rosy Brown and Sienna combined?" }, { "from": "gpt", "value": "The answer is 85" } ], "clip_score": 26.703125, "loss": 2.323110342025757 }, { "id": "identity_254379", "image": "IconQA/images/48816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume of the shape if its height is 10 units and the radius of its base is 3 units?" }, { "from": "gpt", "value": "The answer is \\( 90\\pi \\) cubic units" } ], "clip_score": 28.8125, "loss": 2.71124267578125 }, { "id": "identity_237248", "image": "TQA/images/question_images/dna_6767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique hydrogen bonds are formed between the nucleobase pairings in DNA?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.828125, "loss": 2.097306966781616 }, { "id": "identity_71328", "image": "ScienceQA/images/14391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tubes are connected to the top of the bioreactor?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.078125, "loss": 4.179014205932617 }, { "id": "identity_64049", "image": "TQA/images/question_images/atomic_mass_number_9002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of neutrons between Cl and Mg?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.203125, "loss": 2.426462411880493 }, { "id": "identity_71150", "image": "AI2D/images/3724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organ that stores bile before it is released into the small intestine?" }, { "from": "gpt", "value": "The answer is Gall-bladder" } ], "clip_score": 30.03125, "loss": 2.16262149810791 }, { "id": "identity_29268", "image": "TQA/images/question_images/earth_moon_phases_6005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are those portions of the electromagnetic radiation given off by the Sun?\nChoices:\n(A) Eclipse\n(B) Radition\n(C) Sun's Rays\n(D) Gravity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 3.8411688804626465 }, { "id": "identity_176118", "image": "TabMWP/images/34535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can Anita afford to buy both a map of the world and a cat calendar with her $11?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.546875, "loss": 2.7992665767669678 }, { "id": "identity_256622", "image": "PMC-VQA/images/PMC3245499_fg2_120079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of a T1-weighted MRI that make it suitable for imaging tuberculous granulomas in the basal cisterna, and how do these features compare to those of T2-weighted and Diffusion-weighted MRIs?" }, { "from": "gpt", "value": "The answer is T1-weighted MRI provides high anatomical detail suitable for identifying tuberculous granulomas" } ], "clip_score": 30.765625, "loss": 1.8701227903366089 }, { "id": "identity_15901", "image": "VizWiz/images/VizWiz_train_00018331.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What temperature do I have this toaster oven set on?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 32.0625, "loss": 3.6558594703674316 }, { "id": "identity_97391", "image": "IconQA/images/94192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares make up the letter 'L' before transformation?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.734375, "loss": 2.566415548324585 }, { "id": "identity_286639", "image": "TabMWP/images/2892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students in Ms. Peter's design class have more than one toy car to contribute towards completing their model of a futuristic highway?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 28.546875, "loss": 2.787369966506958 }, { "id": "identity_202337", "image": "VQA2.0/images/165105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bird doing in the water?" }, { "from": "gpt", "value": "The answer is Bathing" } ], "clip_score": 23.625, "loss": 3.3046157360076904 }, { "id": "identity_259210", "image": "TextVQA/images/001bebecea382500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the website URL and the phone number listed on the sign beside the black and yellow car?" }, { "from": "gpt", "value": "The answer is www.classicmotorevents.co.uk, 01527 831726" } ], "clip_score": 28.1875, "loss": 1.5343948602676392 }, { "id": "identity_228129", "image": "Geometry3K/images/1330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the values of x and y so that the quadrilateral is a parallelogram" }, { "from": "gpt", "value": "The answer is \\(x = 11\\) and \\(y = 7\\)" } ], "clip_score": 31.5, "loss": 1.8602689504623413 }, { "id": "identity_257993", "image": "DocVQA/images/pxxm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the organization where William J. Darby holds the designation of President?" }, { "from": "gpt", "value": "The answer is 489 Fifth Avenue, New York, N.Y. 10017" } ], "clip_score": 26.09375, "loss": 1.1704168319702148 }, { "id": "identity_329487", "image": "MapQA/images/map_3651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this legend a continuous bar?\nIs the legend here a continuous bar?\nIs the legend shown a continuous bar?\nIs the legend in the image a continuous bar?\nIs the legend displayed a continuous bar?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.265625, "loss": 2.925469160079956 }, { "id": "identity_59283", "image": "ChartQA/images/multi_col_60804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times did Publishing of journals and periodicals exceed 2,000 in value from 2008-2015?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.53125, "loss": 2.6565754413604736 }, { "id": "identity_138335", "image": "MapQA/images/map_3465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of stocks in the state of Texas?" }, { "from": "gpt", "value": "The answer is 280493-381200" } ], "clip_score": 34.34375, "loss": 2.4075326919555664 }, { "id": "identity_99800", "image": "TabMWP/images/20560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy one of each type of desktop and laptop, how much would you spend in total?" }, { "from": "gpt", "value": "The answer is $29,346" } ], "clip_score": 35.96875, "loss": 2.156020164489746 }, { "id": "identity_204033", "image": "A-OKVQA/images/241728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the food stand vendor's shirt?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.78125, "loss": 3.8685402870178223 }, { "id": "identity_50044", "image": "DVQA/images/bar_train_00080022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy for the 'jet' algorithm?" }, { "from": "gpt", "value": "The answer is milk" } ], "clip_score": 34.125, "loss": 3.007321834564209 }, { "id": "identity_242558", "image": "IconQA/images/43298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 27.796875, "loss": 2.0520355701446533 }, { "id": "identity_273432", "image": "ScienceQA/images/15504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) bumpy\n(B) slippery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.28125, "loss": 4.056185722351074 }, { "id": "identity_110746", "image": "TextVQA/images/a3ce7908f1edb464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity taking place in this image?" }, { "from": "gpt", "value": "The answer is Playing Ultimate Frisbee" } ], "clip_score": 28.421875, "loss": 2.3326730728149414 }, { "id": "identity_294756", "image": "IconQA/images/1485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on? \nOptions:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.527245998382568 }, { "id": "identity_314480", "image": "FigureQA/images/42731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0625, "loss": 3.931614398956299 }, { "id": "identity_280624", "image": "Geometry3K/images/1946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 2.\nOptions:\n(A) 20\n(B) 40\n(C) 45\n(D) 65" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.921875, "loss": 3.149106740951538 }, { "id": "identity_235010", "image": "Geometry3K/images/1117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\(\\angle R\\)" }, { "from": "gpt", "value": "The answer is 150 degrees" } ], "clip_score": 32.28125, "loss": 2.956451892852783 }, { "id": "identity_191465", "image": "A-OKVQA/images/124102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition can be inferred from the image?" }, { "from": "gpt", "value": "The answer is Cloudy" } ], "clip_score": 17.75, "loss": 3.0480222702026367 }, { "id": "identity_59476", "image": "DocVQA/images/ntnk0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of The Nutrition Foundation, Inc.?" }, { "from": "gpt", "value": "The answer is 489 Fifth Avenue New York, New York 10017" } ], "clip_score": 23.28125, "loss": 1.2410444021224976 }, { "id": "identity_255902", "image": "TabMWP/images/25939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total cost difference between containers of honeydews and watermelons over the two years, 1996 and 1997?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.3125, "loss": 3.347374677658081 }, { "id": "identity_174245", "image": "TabMWP/images/9962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of broken crayons per box?" }, { "from": "gpt", "value": "The answer is 39 broken crayons" } ], "clip_score": 33.59375, "loss": 2.025892496109009 }, { "id": "identity_36258", "image": "IconQA/images/3534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is shown?" }, { "from": "gpt", "value": "The answer is 3,150" } ], "clip_score": 22.875, "loss": 2.4690372943878174 }, { "id": "identity_248051", "image": "A-OKVQA/images/148668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many reflective elements are visible on the bike, and where are they located?" }, { "from": "gpt", "value": "The answer is Four reflective elements, located on both wheels" } ], "clip_score": 19.59375, "loss": 3.099144220352173 }, { "id": "identity_52111", "image": "PlotQA/images/18457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average CPIA trade rating for Malawi over the years shown?" }, { "from": "gpt", "value": "The answer is 3.125" } ], "clip_score": 23.5, "loss": 2.132735013961792 }, { "id": "identity_67710", "image": "TQA/images/abc_question_images/parts_ocean_floor_17240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature is indicated by label C?" }, { "from": "gpt", "value": "The answer is Coastal mountain range" } ], "clip_score": 35.40625, "loss": 3.2439486980438232 }, { "id": "identity_131801", "image": "PMC-VQA/images/PMC9376875_f2_372531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain in the leftmost image?" }, { "from": "gpt", "value": "The answer is The leftmost image shows a lateral view of the brain" } ], "clip_score": 30.453125, "loss": 2.203090190887451 }, { "id": "identity_119839", "image": "TextVQA/images/d9e27054a3b1badf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the opus number of the composition?" }, { "from": "gpt", "value": "The answer is 803" } ], "clip_score": 13.0, "loss": 2.5078155994415283 }, { "id": "identity_218765", "image": "PMC-VQA/images/PMC4901062_F1_39732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text or lettering present in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.6328125, "loss": 3.154533624649048 }, { "id": "identity_133650", "image": "Super-CLEVR/images/superCLEVR_new_000931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the bicycle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.03125, "loss": 1.7946964502334595 }, { "id": "identity_185459", "image": "PMC-VQA/images/PMC6551428_F000301_489637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.75, "loss": 2.845426321029663 }, { "id": "identity_211872", "image": "TextVQA/images/03b10f45eb118bd5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What model is the mobile phone shown in the image?" }, { "from": "gpt", "value": "The answer is Treo 700wx" } ], "clip_score": 26.828125, "loss": 2.2837610244750977 }, { "id": "identity_338162", "image": "TextVQA/images/128f1fcef004e645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is served fresh every single day?" }, { "from": "gpt", "value": "The answer is manna" } ], "clip_score": 31.921875, "loss": 2.454525947570801 }, { "id": "identity_163466", "image": "GeoQA+/images/974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠BAC is 90 degrees in parallelogram ABCD, what is the measure of ∠ADC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 33.1875, "loss": 1.7415049076080322 }, { "id": "identity_243078", "image": "TextVQA/images/f1e5897dfd3cf5d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current date and time, and which network is this person on?" }, { "from": "gpt", "value": "The answer is Thursday 10, 9:54 PM, AT&T" } ], "clip_score": 15.046875, "loss": 1.7869218587875366 }, { "id": "identity_91862", "image": "TabMWP/images/18867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Diana wants to buy an equal amount of each type of applesauce, how many pounds can she buy with $50?" }, { "from": "gpt", "value": "The answer is 2 pounds" } ], "clip_score": 36.03125, "loss": 2.629211187362671 }, { "id": "identity_193417", "image": "TextVQA/images/c97ac1efbb3c6273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is being advertised in the image?" }, { "from": "gpt", "value": "The answer is England vs USA Cape Town FIFA Fan Fest" } ], "clip_score": 24.1875, "loss": 2.780703544616699 }, { "id": "identity_295899", "image": "ScienceQA/images/5718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this plate?\nChoices:\n(A) paper\n(B) rock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.421875, "loss": 4.282247066497803 }, { "id": "identity_76842", "image": "AI2D/images/1348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many degrees does the Earth orbit around the sun from the Winter Solstice to the Vernal Equinox?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.53125, "loss": 1.7551250457763672 }, { "id": "identity_65120", "image": "AI2D/images/1786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food chain in this marine ecosystem?" }, { "from": "gpt", "value": "The answer is Royal Albatross" } ], "clip_score": 30.375, "loss": 1.77366304397583 }, { "id": "identity_147569", "image": "DocVQA/images/slyc0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number for the table titled \"Electrophoretic Versus Chemical Fractionation of the Serum Proteins, Burma\"?" }, { "from": "gpt", "value": "The answer is 163" } ], "clip_score": 16.640625, "loss": 2.566950559616089 }, { "id": "identity_320107", "image": "Geometry3K/images/910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Each pair of shapes is similar. Find y.\nChoices:\n(A) 12\n(B) 14\n(C) 20\n(D) 88" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.6875, "loss": 2.4228954315185547 }, { "id": "identity_332697", "image": "TabMWP/images/11621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much cheese was consumed per person in Argentina in 2006?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 32.4375, "loss": 2.1018173694610596 }, { "id": "identity_285218", "image": "TextVQA/images/e82caacfc32ba9e9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company's name is indicated on the cargo label?" }, { "from": "gpt", "value": "The answer is schenker" } ], "clip_score": 29.375, "loss": 3.2554376125335693 }, { "id": "identity_293638", "image": "A-OKVQA/images/210241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following infrastructure is essential for the transportation system to operate in this area: roads, train tracks, taxis, or airports?\nChoices:\n(A) roads\n(B) train tracks\n(C) taxis\n(D) airports" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.90625, "loss": 4.393288612365723 }, { "id": "identity_229983", "image": "VizWiz/images/VizWiz_train_00010428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key identifying features of a poppy flower, and how do they compare to the features of the flower in the image?" }, { "from": "gpt", "value": "The answer is The flower in the image is not a poppy" } ], "clip_score": 20.15625, "loss": 2.6777634620666504 }, { "id": "identity_329522", "image": "A-OKVQA/images/290034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the sum of each individual digit on the side of this boat?\nChoices:\n(A) 1530\n(B) 22\n(C) nine\n(D) 34" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0703125, "loss": 3.4279370307922363 }, { "id": "identity_259396", "image": "PMC-VQA/images/PMC3948506_Fig11_272836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which muscles shown in the MRI are part of the hamstring group, and which muscle is not part of this group?" }, { "from": "gpt", "value": "The answer is Gluteus maximus is not part of the hamstring group" } ], "clip_score": 31.796875, "loss": 1.7916971445083618 }, { "id": "identity_27524", "image": "DocVQA/images/ggjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the manager of public service company ?" }, { "from": "gpt", "value": "The answer is William Dutch" } ], "clip_score": 19.109375, "loss": 4.307448863983154 }, { "id": "identity_161668", "image": "UniGeo/images/1450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point E is the intersection of the equilateral triangle ABC and circle O, what is the measure of angle AEC?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 35.375, "loss": 1.6948628425598145 }, { "id": "identity_89570", "image": "UniGeo/images/1842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that BC is half the length of AB, and DC is 3 cm, what is the length of segment BC?" }, { "from": "gpt", "value": "The answer is 3 cm" } ], "clip_score": 22.1875, "loss": 1.9623044729232788 }, { "id": "identity_278208", "image": "UniGeo/images/850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point C lies on the circle O. If the central angle ∠ACB is 36°, what is the degree measure of the central angle ∠AOB?\nChoices:\n(A) 18°\n(B) 36°\n(C) 54°\n(D) 72°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.34375, "loss": 2.77790904045105 }, { "id": "identity_235791", "image": "FigureQA/images/42910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank all the colors from the largest to the smallest section in the pie chart" }, { "from": "gpt", "value": "The answer is Coral, Light Green, Blue, Light Slate, Medium Orchid" } ], "clip_score": 39.75, "loss": 1.3098689317703247 }, { "id": "identity_31890", "image": "TextVQA/images/91e4c8b4e33b7aad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: are these to machines to put coins in and out come different kinds of stickers?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 24.53125, "loss": 4.529354095458984 }, { "id": "identity_159412", "image": "Geometry3K/images/1050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line with the 78° angle is a transversal intersecting two parallel lines, what is the measure of the angle directly opposite to the 78° angle on the other parallel line?" }, { "from": "gpt", "value": "The answer is 78°" } ], "clip_score": 33.1875, "loss": 1.6592254638671875 }, { "id": "identity_4128", "image": "ScienceQA/images/8179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Brody's test show?\nChoices:\n(A) if the weather station would work when the temperature was 50°C\n(B) how well the weather station would work when it was windy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.765625, "loss": 4.113604545593262 }, { "id": "identity_263878", "image": "VQA2.0/images/277439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the couple engaged in?" }, { "from": "gpt", "value": "The answer is using mobile phone" } ], "clip_score": 26.859375, "loss": 3.209155321121216 }, { "id": "identity_326692", "image": "TextVQA/images/102be9f03369e1b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who brewed this beer?" }, { "from": "gpt", "value": "The answer is duclaw " } ], "clip_score": 23.59375, "loss": 5.294536113739014 }, { "id": "identity_233794", "image": "Super-CLEVR/images/superCLEVR_new_000587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shiny things are there in total, and how many of them are on the right side of the small metallic motorbike?" }, { "from": "gpt", "value": "The answer is Total shiny things: 4, Shiny things on the right side: 2" } ], "clip_score": 27.53125, "loss": 1.4429588317871094 }, { "id": "identity_29057", "image": "PMC-VQA/images/PMC3849748_F3_248294.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the spine was stabilized with posterior stabilization and interbody fusion? \nChoices:\n(A) C5-C7 \n(B) Th11- L2 \n(C) Th 12/ L1 \n(D) lumbar spine " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 3.3377082347869873 }, { "id": "identity_223410", "image": "A-OKVQA/images/145447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the roof of the building on the left side of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.578125, "loss": 4.333189487457275 }, { "id": "identity_177277", "image": "TabMWP/images/8053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of schools in all towns combined?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 34.6875, "loss": 2.2168667316436768 }, { "id": "identity_19502", "image": "VQA2.0/images/73327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there someone in the plane?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.84375, "loss": 3.5707218647003174 }, { "id": "identity_124298", "image": "A-OKVQA/images/524255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color worn by the audience?" }, { "from": "gpt", "value": "The answer is This is subjective as the audience is wearing various colors, and no single color stands out predominantly" } ], "clip_score": 11.953125, "loss": 2.2448980808258057 }, { "id": "identity_117299", "image": "TextVQA/images/72726db5d909615a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the beaker made of?" }, { "from": "gpt", "value": "The answer is Glass" } ], "clip_score": 32.28125, "loss": 2.6497509479522705 }, { "id": "identity_222497", "image": "A-OKVQA/images/514800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.609375, "loss": 2.4588115215301514 }, { "id": "identity_213878", "image": "PMC-VQA/images/PMC3457007_F5_156995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are used to label different regions in section D?" }, { "from": "gpt", "value": "The answer is Red, green, and blue" } ], "clip_score": 19.0625, "loss": 2.4197189807891846 }, { "id": "identity_267636", "image": "VQA2.0/images/336310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the laptop powered on?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.546875, "loss": 3.8922371864318848 }, { "id": "identity_61630", "image": "DocVQA/images/xnkp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the travel agency that issued this ticket?" }, { "from": "gpt", "value": "The answer is HEIGHTS TRAVEL SERVICE" } ], "clip_score": 20.90625, "loss": 2.8665597438812256 }, { "id": "identity_221316", "image": "VizWiz/images/VizWiz_train_00019201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What breed of horses are depicted on the mug?" }, { "from": "gpt", "value": "The answer is Clydesdales" } ], "clip_score": 24.03125, "loss": 1.9127799272537231 }, { "id": "identity_79684", "image": "UniGeo/images/1639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point C divides line segment AB in the ratio 3:2 and the length of segment AC is 18 cm, what is the length of segment CB?" }, { "from": "gpt", "value": "The answer is 12 cm" } ], "clip_score": 28.453125, "loss": 1.4727662801742554 }, { "id": "identity_243156", "image": "GeoQA+/images/2982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A,B,C在⊙O上,CO的延长线交AB于点D,∠A=50°,∠B=30°,求∠OBC的度数。" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 32.875, "loss": 1.6859511137008667 }, { "id": "identity_112994", "image": "A-OKVQA/images/412767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of glass is next to the sandwich?" }, { "from": "gpt", "value": "The answer is A water glass" } ], "clip_score": 27.234375, "loss": 3.161642074584961 }, { "id": "identity_306607", "image": "Geometry3K/images/350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of y in the diagram.\nChoices:\n(A) 30\n(B) 35\n(C) 40\n(D) 45" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.015625, "loss": 2.836289882659912 }, { "id": "identity_281500", "image": "Super-CLEVR/images/superCLEVR_new_000832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the rubber object located behind the large blue bicycle?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 30.546875, "loss": 5.044633865356445 }, { "id": "identity_137303", "image": "ChartQA/images/two_col_41634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Hellenic Cables and Prysmian?" }, { "from": "gpt", "value": "The answer is 21%" } ], "clip_score": 32.09375, "loss": 2.1612954139709473 }, { "id": "identity_111666", "image": "VizWiz/images/VizWiz_train_00012211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 21.546875, "loss": 3.0268352031707764 }, { "id": "identity_167306", "image": "TabMWP/images/2258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of tractors between the farmer with the most and the farmer with the least?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.5625, "loss": 2.4433600902557373 }, { "id": "identity_304002", "image": "TQA/images/question_images/human_system_digestive_3783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is waste formed?\nChoices:\n(A) Large intestine\n(B) Small intestine\n(C) Stomach\n(D) Appendix" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.625, "loss": 3.727221965789795 }, { "id": "identity_267667", "image": "ScienceQA/images/7178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) North Carolina\n(B) Pennsylvania\n(C) Connecticut\n(D) Alabama" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.234375, "loss": 4.2653703689575195 }, { "id": "identity_169140", "image": "TabMWP/images/33071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a race car ride cost compared to a jet ski ride?" }, { "from": "gpt", "value": "The answer is $244.00" } ], "clip_score": 33.375, "loss": 2.0172317028045654 }, { "id": "identity_76308", "image": "AI2D/images/1498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the process where water is absorbed into the ground?" }, { "from": "gpt", "value": "The answer is Percolation" } ], "clip_score": 26.515625, "loss": 2.805309772491455 }, { "id": "identity_202065", "image": "PMC-VQA/images/PMC6425693_F6_454105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the arrows pointing to specific areas in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.0625, "loss": 3.3062057495117188 }, { "id": "identity_132218", "image": "Super-CLEVR/images/superCLEVR_new_001564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have exactly three wheels?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.4375, "loss": 2.346111536026001 }, { "id": "identity_153347", "image": "ScienceQA/images/15081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 26.328125, "loss": 2.9029815196990967 }, { "id": "identity_148089", "image": "DocVQA/images/sxdg0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the options provided for the student's present scholastic record?" }, { "from": "gpt", "value": "The answer is satisfactory, unsatisfactory" } ], "clip_score": 23.390625, "loss": 2.92634916305542 }, { "id": "identity_195753", "image": "A-OKVQA/images/205762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any signage visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a sign in the background" } ], "clip_score": 19.921875, "loss": 2.2446141242980957 }, { "id": "identity_215375", "image": "A-OKVQA/images/440032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing lanes are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.859375, "loss": 2.5401501655578613 }, { "id": "identity_258869", "image": "IconQA/images/64146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify and describe the type of symmetry (if any) present in the image, and explain why the image does or does not exhibit that symmetry" }, { "from": "gpt", "value": "The answer is The image does not exhibit reflectional symmetry because the two halves are not identical" } ], "clip_score": 17.203125, "loss": 2.52055025100708 }, { "id": "identity_19325", "image": "AI2D/images/13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A section or part between two nodes known as?\nChoices:\n(A) Internode\n(B) Petiole\n(C) Stem\n(D) Terminal" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.171875, "loss": 3.94978404045105 }, { "id": "identity_316692", "image": "Geometry3K/images/563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the scale factor from one shape to another.\nChoices:\n(A) 0.75\n(B) 1.25\n(C) 1.5\n(D) 1.75" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.28125, "loss": 3.1881299018859863 }, { "id": "identity_150808", "image": "ScienceQA/images/6446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the yellow particles represent a solute, which solution is more saturated?" }, { "from": "gpt", "value": "The answer is Solution B" } ], "clip_score": 31.21875, "loss": 2.4364168643951416 }, { "id": "identity_151760", "image": "ScienceQA/images/8517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean is located to the east of the highlighted country?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 31.921875, "loss": 2.6560306549072266 }, { "id": "identity_204732", "image": "PMC-VQA/images/PMC3432878_fig2_152924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled with the green arrow?" }, { "from": "gpt", "value": "The answer is Anterior cerebral artery" } ], "clip_score": 28.96875, "loss": 2.0397489070892334 }, { "id": "identity_283663", "image": "A-OKVQA/images/567287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the umbrellas being different colors?\nChoices:\n(A) privacy\n(B) random colors\n(C) being colorful\n(D) identification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.10541296005249 }, { "id": "identity_248541", "image": "TabMWP/images/29200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of participants (children and parents) in all activities combined at Vivian's birthday party?" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 36.78125, "loss": 2.760338306427002 }, { "id": "identity_36131", "image": "A-OKVQA/images/484019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who speaks the same language that the sign is in?\nChoices:\n(A) charlotte vega\n(B) janet montgomery\n(C) roxane mesquida\n(D) sara paxton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.359375, "loss": 3.7879791259765625 }, { "id": "identity_313983", "image": "VQA2.0/images/479219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was this motorcycle introduced for sale?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 28.28125, "loss": 3.749143362045288 }, { "id": "identity_301020", "image": "FigureQA/images/66738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Peru the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.40207576751709 }, { "id": "identity_227112", "image": "PMC-VQA/images/PMC5880870_Fig2_296385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, which brain regions show increased rsFC in schizophrenia patients, and how does this compare to regions with decreased rsFC?" }, { "from": "gpt", "value": "The answer is Warm colors show increased rsFC in frontal and temporal lobes" } ], "clip_score": 33.375, "loss": 3.2719132900238037 }, { "id": "identity_118960", "image": "A-OKVQA/images/284355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headwear is the man on the horse wearing?" }, { "from": "gpt", "value": "The answer is A cowboy hat" } ], "clip_score": 23.875, "loss": 2.5867321491241455 }, { "id": "identity_191745", "image": "PMC-VQA/images/PMC5126839_Fig3_100226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many segments are labeled on the left side of the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 16.890625, "loss": 2.4627997875213623 }, { "id": "identity_128835", "image": "PMC-VQA/images/PMC8858981_f1_198546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is highlighted by the pink color in the image?" }, { "from": "gpt", "value": "The answer is Spleen" } ], "clip_score": 29.375, "loss": 2.554220199584961 }, { "id": "identity_62136", "image": "DocVQA/images/xxyh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the document prepared for?" }, { "from": "gpt", "value": "The answer is Great Western Sugar Company" } ], "clip_score": 24.5, "loss": 2.511981964111328 }, { "id": "identity_276280", "image": "PMC-VQA/images/PMC7797455_FIG3_339777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the CT images, which lobes show more extensive effects?\nChoices:\n(A) Upper lobes\n(B) Middle lobes\n(C) Lower lobes\n(D) All lobes are equally affected" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.46875, "loss": 4.413824558258057 }, { "id": "identity_252436", "image": "TabMWP/images/4095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each sheet has 10 stickers, how many stickers are on 15 sheets?" }, { "from": "gpt", "value": "The answer is 150" } ], "clip_score": 33.90625, "loss": 1.5037040710449219 }, { "id": "identity_229262", "image": "UniGeo/images/289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,四个全等的直角三角形拼接成一个正方形,其中AE=10,BE=24,求正方形的边长。" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 24.953125, "loss": 2.748810052871704 }, { "id": "identity_209596", "image": "VQA2.0/images/150164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any reflections visible on the wet ground?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.203125, "loss": 2.9672775268554688 }, { "id": "identity_77600", "image": "AI2D/images/4413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which leaf type is characterized by having a tip with a small, stiff point?" }, { "from": "gpt", "value": "The answer is Bristle-tipped" } ], "clip_score": 33.21875, "loss": 1.436075210571289 }, { "id": "identity_216170", "image": "VizWiz/images/VizWiz_train_00007277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme represented on the cup?" }, { "from": "gpt", "value": "The answer is Space exploration" } ], "clip_score": 23.515625, "loss": 3.3475825786590576 }, { "id": "identity_2245", "image": "GeoQA+/images/2094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,一个直角三角形ABC的斜边AB与量角器的零刻度线重合,点D对应56°,则∠BCD的度数为()\nChoices:\n(A) 28°\n(B) 56°\n(C) 62°\n(D) 64°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.59375, "loss": 2.714658260345459 }, { "id": "identity_236638", "image": "UniGeo/images/1580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB∥CD,AD与BC相交于点P,AB=3,CD=6,AP=4,若DP的长为8,求AD的长。" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 28.75, "loss": 2.0266716480255127 }, { "id": "identity_177525", "image": "TabMWP/images/18250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost per pound of all the cookie dough options listed?" }, { "from": "gpt", "value": "The answer is $4.67 per pound" } ], "clip_score": 33.46875, "loss": 1.690556526184082 }, { "id": "identity_199195", "image": "TextVQA/images/8001268e11863358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many novels are included in this issue of the comic book?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 23.359375, "loss": 3.3776512145996094 }, { "id": "identity_163767", "image": "GeoQA+/images/831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and chord CD is drawn, what is the perimeter of quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is approximately 27.66 units" } ], "clip_score": 35.21875, "loss": 2.4665770530700684 }, { "id": "identity_85337", "image": "UniGeo/images/2084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle DAE?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 29.53125, "loss": 2.3458404541015625 }, { "id": "identity_50360", "image": "FigureQA/images/73832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Gray line ever intersect with the Firebrick line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.796875, "loss": 3.2094032764434814 }, { "id": "identity_29372", "image": "VQA2.0/images/236593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the bike parked?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.234375, "loss": 4.122250556945801 }, { "id": "identity_18614", "image": "PMC-VQA/images/PMC9078666_fig2_274094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing? \nChoices:\n(A) Live silk worms \n(B) silk spheres \n(C) eTuSp1 Drug \n(D) Unknown substance " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 4.019740104675293 }, { "id": "identity_48", "image": "FigureQA/images/15047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.8125, "loss": 4.514103412628174 }, { "id": "identity_249761", "image": "PMC-VQA/images/PMC8085189_Fig1_431007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are visible in the CT scan, and how does their appearance confirm the patient’s position as prone?" }, { "from": "gpt", "value": "The answer is The heart, lungs, spine, and sternum confirm the prone position" } ], "clip_score": 32.59375, "loss": 3.110227346420288 }, { "id": "identity_298616", "image": "VQA2.0/images/12386.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mountains can be seen in the distance?" }, { "from": "gpt", "value": "The answer is several" } ], "clip_score": 18.96875, "loss": 3.7075276374816895 }, { "id": "identity_202372", "image": "A-OKVQA/images/399275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of crossing is depicted in the image?" }, { "from": "gpt", "value": "The answer is Zebra crossing" } ], "clip_score": 29.421875, "loss": 2.589282274246216 }, { "id": "identity_317219", "image": "A-OKVQA/images/521301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What other animal is this one most closely related to?\nChoices:\n(A) mollusk\n(B) donkey\n(C) badger\n(D) possum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.84375, "loss": 4.140840530395508 }, { "id": "identity_298180", "image": "DocVQA/images/jrxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is being referred to?" }, { "from": "gpt", "value": "The answer is March 26, 1976" } ], "clip_score": 20.9375, "loss": 1.8003394603729248 }, { "id": "identity_274867", "image": "VQA-RAD/images/synpic34054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the patient's condition?" }, { "from": "gpt", "value": "The answer is Blind loop syndrome" } ], "clip_score": 22.09375, "loss": 4.186637878417969 }, { "id": "identity_185544", "image": "VQA2.0/images/360771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the woman's dress?" }, { "from": "gpt", "value": "The answer is Tropical leaf pattern" } ], "clip_score": 14.7578125, "loss": 3.0885941982269287 }, { "id": "identity_306789", "image": "ScienceQA/images/13044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Olympia\n(B) Tucson\n(C) Helena\n(D) Phoenix" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.84375, "loss": 3.9460182189941406 }, { "id": "identity_261544", "image": "PMC-VQA/images/PMC4047780_F3_295237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of radiograph is shown in image (B)?\nChoices:\n(A) Panoramic radiograph\n(B) CBCT\n(C) MRI\n(D) CT scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.03125, "loss": 4.451362609863281 }, { "id": "identity_133516", "image": "A-OKVQA/images/201596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats can be seen in the background?" }, { "from": "gpt", "value": "The answer is Four boats" } ], "clip_score": 23.25, "loss": 4.702709674835205 }, { "id": "identity_4334", "image": "A-OKVQA/images/34597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for the maneuver the skaters will do next?\nChoices:\n(A) ollie\n(B) manual\n(C) dropping in\n(D) kick flip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.203125, "loss": 4.342543125152588 }, { "id": "identity_321676", "image": "GeoQA+/images/1785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,∠ACB=40°,AC=BC,E为BC的中点,连接OE并延长交⊙O于点D,连接AD,则∠ADO的大小为多少?\nChoices:\n(A) 35°\n(B) 25°\n(C) 20°\n(D) 15°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.140625, "loss": 2.334824323654175 }, { "id": "identity_289515", "image": "DocVQA/images/rxhm0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the executive secretary?" }, { "from": "gpt", "value": "The answer is channing H. Lushbough" } ], "clip_score": 16.84375, "loss": 4.731597423553467 }, { "id": "identity_97897", "image": "CLEVR-Math/images/CLEVR_train_029830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cylinders are there in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.96875, "loss": 1.9500188827514648 }, { "id": "identity_246111", "image": "IconQA/images/97058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many birds are there in total, and what is the sum of the digits of this number?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 21.890625, "loss": 3.155839204788208 }, { "id": "identity_264834", "image": "IconQA/images/92836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the clock times shown, which friend watched the TV show earlier on Wednesday night, Jackson or Lola?\nChoices:\n(A) Jackson\n(B) Lola" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.234375, "loss": 3.330889940261841 }, { "id": "identity_198735", "image": "PMC-VQA/images/PMC4921494_F2_45002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in this image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.875, "loss": 3.29569935798645 }, { "id": "identity_12824", "image": "IconQA/images/83967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rectangles are there?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.1875, "loss": 1.943532109260559 }, { "id": "identity_214142", "image": "TextVQA/images/53230dc1ef1df3b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the iPhone's camera lens centered or off-centered?" }, { "from": "gpt", "value": "The answer is Off-centered" } ], "clip_score": 22.609375, "loss": 2.118899345397949 }, { "id": "identity_76313", "image": "AI2D/images/3525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'Golgi apparatus' refer to in the cell diagram?" }, { "from": "gpt", "value": "The answer is An organelle that processes and packages proteins and lipids" } ], "clip_score": 23.515625, "loss": 1.7400200366973877 }, { "id": "identity_47980", "image": "ChartQA/images/multi_col_1298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in revenue between Batman Begins and The Dark Knight in North America?" }, { "from": "gpt", "value": "The answer is 328.01 million U.S. dollars" } ], "clip_score": 27.734375, "loss": 1.8335282802581787 }, { "id": "identity_204589", "image": "PMC-VQA/images/PMC4247313_f2-etm-09-01-0117_340163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is predominantly visible in the upper left quadrant of the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 28.109375, "loss": 3.2499451637268066 }, { "id": "identity_273453", "image": "A-OKVQA/images/243071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Of what material is the man's jacket composed?\nChoices:\n(A) plastic\n(B) leather\n(C) wool\n(D) denim" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.6796875, "loss": 4.4196977615356445 }, { "id": "identity_55481", "image": "FigureQA/images/60223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value for Medium Mint?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 22.421875, "loss": 2.7709128856658936 }, { "id": "identity_111071", "image": "VizWiz/images/VizWiz_train_00013282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Evening or overcast day" } ], "clip_score": 14.75, "loss": 3.1677329540252686 }, { "id": "identity_542", "image": "FigureQA/images/29112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5, "loss": 4.270163536071777 }, { "id": "identity_18664", "image": "AI2D/images/325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the given food chain, if all the giant kelp were destroyed, which population would suffer?\nChoices:\n(A) otters\n(B) large fishes\n(C) sea stars\n(D) sea urchins, snails and crabs" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.53125, "loss": 3.1892452239990234 }, { "id": "identity_146894", "image": "DocVQA/images/spmf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of Mr. Dick Ferris at United Airlines?" }, { "from": "gpt", "value": "The answer is President" } ], "clip_score": 23.40625, "loss": 3.1482341289520264 }, { "id": "identity_134190", "image": "ChartQA/images/5976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the third highest number of special immigrant visa holders?" }, { "from": "gpt", "value": "The answer is Virginia" } ], "clip_score": 31.15625, "loss": 3.1897480487823486 }, { "id": "identity_270228", "image": "DVQA/images/bar_train_00010752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are more people inclined towards choosing the object \"week\" in the category \"fly\" over the object \"case\" in the category \"site\"?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 32.0, "loss": 3.077291965484619 }, { "id": "identity_204749", "image": "VQA2.0/images/299836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fireplace is in the room?" }, { "from": "gpt", "value": "The answer is Wood-burning" } ], "clip_score": 22.484375, "loss": 3.030255079269409 }, { "id": "identity_92914", "image": "TabMWP/images/6867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median wait time for technical assistance?" }, { "from": "gpt", "value": "The answer is 67.5 minutes" } ], "clip_score": 35.8125, "loss": 2.5758821964263916 }, { "id": "identity_175403", "image": "TabMWP/images/38116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organizations planted more than 85 trees?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.375, "loss": 2.5576677322387695 }, { "id": "identity_92649", "image": "TabMWP/images/27254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the goal was to sell at least 200 newspapers each day, on which days was the goal not met?" }, { "from": "gpt", "value": "The answer is Wednesday and Thursday" } ], "clip_score": 33.0, "loss": 1.6485682725906372 }, { "id": "identity_7212", "image": "ScienceQA/images/12081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Virginia?\nChoices:\n(A) Tulsa\n(B) Bridgeport\n(C) Atlanta\n(D) Richmond" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.578125, "loss": 3.8959450721740723 }, { "id": "identity_61623", "image": "DocVQA/images/fmmp0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of securities received?" }, { "from": "gpt", "value": "The answer is $0" } ], "clip_score": 21.84375, "loss": 3.081399917602539 }, { "id": "identity_281669", "image": "IconQA/images/46089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored purple? \n(A) 3/6\n(B) 2/8\n(C) 7/12\n(D) 3/5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.40625, "loss": 3.162740468978882 }, { "id": "identity_184207", "image": "PMC-VQA/images/PMC7795176_polymers-13-00026-f004_338529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining method was used for this microscopy image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) staining" } ], "clip_score": 31.5625, "loss": 0.9017216563224792 }, { "id": "identity_122405", "image": "TextVQA/images/203d9cc8d63c0287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How often does the email mention a child dies from malaria?" }, { "from": "gpt", "value": "The answer is every 45 seconds" } ], "clip_score": 29.65625, "loss": 1.8839242458343506 }, { "id": "identity_119436", "image": "Super-CLEVR/images/superCLEVR_new_001207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the black car relative to the cyan bus?" }, { "from": "gpt", "value": "The answer is In front of" } ], "clip_score": 21.703125, "loss": 2.907456636428833 }, { "id": "identity_122844", "image": "PMC-VQA/images/PMC6909124_acm212773-fig-0001_82999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is located within the green contour?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 28.625, "loss": 3.5176239013671875 }, { "id": "identity_136746", "image": "DocVQA/images/mxxx0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the premium arrival for the promotion?" }, { "from": "gpt", "value": "The answer is March 24, 1997" } ], "clip_score": 25.75, "loss": 1.7200372219085693 }, { "id": "identity_15763", "image": "GeoQA+/images/745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,小明从A处出发沿北偏西30°方向行走至B处,又沿南偏西50°方向行走至C处,此时再沿与出发时一致的方向行走至D处,则∠BCD的度数为()\nChoices:\n(A) 100°\n(B) 80°\n(C) 50°\n(D) 20°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.59375, "loss": 2.4637885093688965 }, { "id": "identity_242153", "image": "DocVQA/images/qpvw0217_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the submission target date for the paper authored by F. Stanczyk, and what is the journal in which it is intended to be published?" }, { "from": "gpt", "value": "The answer is July 2002, American Journal of Obstetrics and Gynecology" } ], "clip_score": 24.859375, "loss": 1.8477212190628052 }, { "id": "identity_52578", "image": "ChartQA/images/8827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the orange bar for Italy?" }, { "from": "gpt", "value": "The answer is 44" } ], "clip_score": 17.828125, "loss": 2.7595858573913574 }, { "id": "identity_139393", "image": "DVQA/images/bar_train_00015908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people who prefer the category 'ground' across all objects?" }, { "from": "gpt", "value": "The answer is 330%" } ], "clip_score": 24.9375, "loss": 2.4546141624450684 }, { "id": "identity_24177", "image": "VQA2.0/images/3602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the board?" }, { "from": "gpt", "value": "The answer is westerville police division" } ], "clip_score": 32.125, "loss": 2.4592180252075195 }, { "id": "identity_291014", "image": "FigureQA/images/37559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gray not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.1875, "loss": 3.6320958137512207 }, { "id": "identity_29819", "image": "TabMWP/images/35679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Liz bought 4 kilograms of ricotta and 4 kilograms of Swiss. How much did she spend?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.28125, "loss": 3.3811848163604736 }, { "id": "identity_237023", "image": "ScienceQA/images/20631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the temperature of the heating pad (independent variable) affect the number of days until a seed germinated (dependent variable), and what is the average germination time for seeds at three different temperatures: 20°C, 25°C, and 30°C?" }, { "from": "gpt", "value": "The answer is Average germination times are 8, 6, and 5 days respectively" } ], "clip_score": 24.15625, "loss": 2.5710537433624268 }, { "id": "identity_290088", "image": "PMC-VQA/images/PMC8035965_FIG2_413436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body was scanned in the CT scan: head, neck, abdomen, or legs?\nChoices:\n(A) Head \n(B) Neck \n(C) Abdomen \n(D) Legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.578125, "loss": 4.138159275054932 }, { "id": "identity_252815", "image": "DVQA/images/bar_train_00093952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank the algorithms from highest to lowest accuracy and determine the difference in accuracy between the highest and lowest algorithms" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 38.59375, "loss": 3.8128902912139893 }, { "id": "identity_174750", "image": "TabMWP/images/21449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of goldfish per aquarium?" }, { "from": "gpt", "value": "The answer is 12.5" } ], "clip_score": 35.96875, "loss": 1.844494104385376 }, { "id": "identity_238715", "image": "TabMWP/images/23704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the number of rounds per year from 2008 to 2012?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 25.9375, "loss": 2.675783157348633 }, { "id": "identity_183768", "image": "PMC-VQA/images/PMC6689493_FIG1_22340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What plane is this CT scan image taken in?" }, { "from": "gpt", "value": "The answer is Axial or transverse plane" } ], "clip_score": 32.96875, "loss": 2.704023599624634 }, { "id": "identity_25503", "image": "Geometry3K/images/1268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the measure of \\angle 7.\nChoices:\n(A) 70\n(B) 110\n(C) 140\n(D) 220" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5625, "loss": 2.998633861541748 }, { "id": "identity_69439", "image": "AI2D/images/3267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Centrioles\"?" }, { "from": "gpt", "value": "The answer is Centrioles help in cell division by forming spindle fibers" } ], "clip_score": 27.015625, "loss": 1.6174801588058472 }, { "id": "identity_105712", "image": "A-OKVQA/images/11968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is shown in the image?" }, { "from": "gpt", "value": "The answer is Horse" } ], "clip_score": 23.328125, "loss": 3.030658483505249 }, { "id": "identity_171849", "image": "TabMWP/images/31779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the greatest decrease in winning time from one year to the next?" }, { "from": "gpt", "value": "The answer is 7 minutes (from 2011 to 2012)" } ], "clip_score": 31.484375, "loss": 1.1085870265960693 }, { "id": "identity_150263", "image": "DocVQA/images/tjgv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is suggested to be the moderator for the symposium?" }, { "from": "gpt", "value": "The answer is Dr. Philip L. White" } ], "clip_score": 20.265625, "loss": 2.8241610527038574 }, { "id": "identity_122204", "image": "A-OKVQA/images/538057.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest tent?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.984375, "loss": 3.6673171520233154 }, { "id": "identity_245201", "image": "TabMWP/images/6453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the equation of the function represented by the table and verify if it is linear or nonlinear" }, { "from": "gpt", "value": "The answer is The function is linear with the equation \\( y = -5x + 37 \\)" } ], "clip_score": 24.03125, "loss": 2.7121880054473877 }, { "id": "identity_321818", "image": "TQA/images/abc_question_images/parts_worm_17299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the spermathecal aperture of this organism?\nChoices:\n(A) N\n(B) M\n(C) S\n(D) V" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.21875, "loss": 3.7835891246795654 }, { "id": "identity_108967", "image": "VQA-RAD/images/synpic30215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the image in pixels?" }, { "from": "gpt", "value": "The answer is 512x512 pixels" } ], "clip_score": 19.765625, "loss": 1.897583246231079 }, { "id": "identity_72495", "image": "TQA/images/abc_question_images/human_system_eye_12857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter \"L\" represent in this diagram of the eye?" }, { "from": "gpt", "value": "The answer is Lens" } ], "clip_score": 32.15625, "loss": 2.1276605129241943 }, { "id": "identity_78516", "image": "AI2D/images/3288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Cristae\"?" }, { "from": "gpt", "value": "The answer is Increase surface area for ATP production" } ], "clip_score": 18.3125, "loss": 2.57388973236084 }, { "id": "identity_307436", "image": "Geometry3K/images/56.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: P is the incenter of a triangle. Determine the measure of a certain angle, \\( \\angle DAC \\).\n\nChoices:\n(A) 28.5\n(B) 30\n(C) 31\n(D) 33" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.21875, "loss": 2.933380365371704 }, { "id": "identity_155176", "image": "ScienceQA/images/21167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the tail in the organism shown in the image?" }, { "from": "gpt", "value": "The answer is To flip the body if turned upside down" } ], "clip_score": 19.359375, "loss": 3.3388381004333496 }, { "id": "identity_106419", "image": "TextVQA/images/5e2e7734a372dd79.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the backpack?" }, { "from": "gpt", "value": "The answer is Berghaus" } ], "clip_score": 31.34375, "loss": 3.3113601207733154 }, { "id": "identity_40798", "image": "FigureQA/images/74638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Bubblegum line consistently above the Dark Orchid line throughout the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.78125, "loss": 3.550778865814209 }, { "id": "identity_46671", "image": "DocVQA/images/fkph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What factory is the report based on?" }, { "from": "gpt", "value": "The answer is Lovell Factory" } ], "clip_score": 23.421875, "loss": 4.716315269470215 }, { "id": "identity_70066", "image": "ScienceQA/images/15946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the typical habitat of the animal depicted in the image?" }, { "from": "gpt", "value": "The answer is Eucalyptus forests and woodlands" } ], "clip_score": 28.0, "loss": 1.8630179166793823 }, { "id": "identity_270588", "image": "UniGeo/images/2848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, AB and CD are chords of circle O, and AB is perpendicular to CD. If ∠CDB = 62°, what is the measure of ∠ACD?\nChoices:\n(A) 28°\n(B) 31°\n(C) 38°\n(D) 62°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 41.3125, "loss": 2.329044818878174 }, { "id": "identity_145558", "image": "PlotQA/images/45791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the debt percentage of China in 1986?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 21.140625, "loss": 1.8456553220748901 }, { "id": "identity_184589", "image": "A-OKVQA/images/114149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vegetation visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.5390625, "loss": 3.0561532974243164 }, { "id": "identity_196147", "image": "VQA-AS/images/6707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 30.265625, "loss": 2.710409164428711 }, { "id": "identity_48403", "image": "DVQA/images/bar_train_00021722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy on the 'fit' dataset?" }, { "from": "gpt", "value": "The answer is ground" } ], "clip_score": 33.65625, "loss": 3.002535343170166 }, { "id": "identity_257955", "image": "VQA2.0/images/364159.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many degrees does each slice of pizza cover if the pizza is cut into 8 equal slices?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 22.75, "loss": 1.410031795501709 }, { "id": "identity_286513", "image": "A-OKVQA/images/156685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is famous for selling fruit from boats, as depicted in the image? Is it:\n(A) Germany\n(B) India\n(C) China\n(D) Vietnam" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.265625, "loss": 3.6148428916931152 }, { "id": "identity_314978", "image": "VQA2.0/images/117349.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the knife handle?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 13.7265625, "loss": 3.788717746734619 }, { "id": "identity_154847", "image": "ScienceQA/images/4325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the likely weight range for this type of hammer?" }, { "from": "gpt", "value": "The answer is 16 to 20 ounces" } ], "clip_score": 28.984375, "loss": 1.7191790342330933 }, { "id": "identity_274856", "image": "UniGeo/images/1113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In quadrilateral ABCD inscribed in circle O, with E on the extension of BC, if ∠DCE = 70°, what is the measure of ∠BOD?\nChoices:\n(A) 100°\n(B) 110°\n(C) 140°\n(D) 70°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.46875, "loss": 2.417975664138794 }, { "id": "identity_4857", "image": "VQA-AS/images/12857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the boy's left hand?" }, { "from": "gpt", "value": "The answer is on chair" } ], "clip_score": 25.21875, "loss": 4.1061320304870605 }, { "id": "identity_158220", "image": "TQA/images/abc_question_images/parts_neuron_17234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part of the neuron is responsible for receiving signals?" }, { "from": "gpt", "value": "The answer is U" } ], "clip_score": 27.1875, "loss": 4.1199631690979 }, { "id": "identity_209625", "image": "VQA-AS/images/140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lady on the left wearing on her feet?" }, { "from": "gpt", "value": "The answer is Black shoes" } ], "clip_score": 17.921875, "loss": 2.8245301246643066 }, { "id": "identity_121665", "image": "VQA2.0/images/392780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest the game is being played?" }, { "from": "gpt", "value": "The answer is Evening or Night" } ], "clip_score": 14.734375, "loss": 3.0382585525512695 }, { "id": "identity_172288", "image": "IconQA/images/53675.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of squares occupied by the letter R?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.6875, "loss": 2.1443874835968018 }, { "id": "identity_96986", "image": "TabMWP/images/1985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pastries in 1, 2, and 3 tins combined?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 37.09375, "loss": 1.897649884223938 }, { "id": "identity_323306", "image": "IconQA/images/21262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 34.0625, "loss": 2.3492438793182373 }, { "id": "identity_308569", "image": "Super-CLEVR/images/superCLEVR_new_000724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color of the large object behind the blue car the same as the matte vehicle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.703125, "loss": 5.3834309577941895 }, { "id": "identity_163139", "image": "UniGeo/images/2035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 30.125, "loss": 2.318967580795288 }, { "id": "identity_133495", "image": "TextVQA/images/1a1dc4f649c70562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of water is stacked on top of the refrigerated section?" }, { "from": "gpt", "value": "The answer is Poland Spring" } ], "clip_score": 27.0625, "loss": 2.9490935802459717 }, { "id": "identity_192074", "image": "PMC-VQA/images/PMC6843538_microorganisms-07-00397-f005_65528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the largest particle in the image?" }, { "from": "gpt", "value": "The answer is Approximately 5 µm" } ], "clip_score": 22.234375, "loss": 2.142960548400879 }, { "id": "identity_164212", "image": "UniGeo/images/389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle O, if ∠ACB = 50°, what is the measure of ∠APC?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 33.625, "loss": 1.6210782527923584 }, { "id": "identity_97086", "image": "CLEVR-Math/images/CLEVR_train_055443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a metallic texture?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.6875, "loss": 2.5250394344329834 }, { "id": "identity_22629", "image": "IconQA/images/43552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Alan is riding his bike in the afternoon. His watch shows the time. What time is it?\nChoices:\n(A) 4:00 P.M.\n(B) 4:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.890625, "loss": 3.5698299407958984 }, { "id": "identity_107576", "image": "A-OKVQA/images/121457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of boat has a sail?" }, { "from": "gpt", "value": "The answer is The blue and white boat" } ], "clip_score": 27.703125, "loss": 3.503565549850464 }, { "id": "identity_105272", "image": "A-OKVQA/images/234929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the plane's body?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 18.640625, "loss": 4.1855926513671875 }, { "id": "identity_226828", "image": "PlotQA/images/155596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage change in the price level ratio from 2009 to 2010, and what is the average price level ratio for the years 2008, 2009, 2010, and 2011?" }, { "from": "gpt", "value": "The answer is Percentage change is 16%, and average price level ratio is 0.28" } ], "clip_score": 28.421875, "loss": 2.033864736557007 }, { "id": "identity_199812", "image": "PMC-VQA/images/PMC4533796_Fig6_413971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in the tissue shown?" }, { "from": "gpt", "value": "The answer is Possible, indicated by the dashed box" } ], "clip_score": 30.109375, "loss": 4.992727279663086 }, { "id": "identity_85374", "image": "GeoQA+/images/1732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在⊙O中,若∠BOC=120°,则弧BC所对的圆周角的度数为()" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 33.375, "loss": 1.6681160926818848 }, { "id": "identity_130719", "image": "VQA2.0/images/24411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What condiment is visible on the table?" }, { "from": "gpt", "value": "The answer is Red pepper flakes" } ], "clip_score": 16.609375, "loss": 1.7945703268051147 }, { "id": "identity_208074", "image": "PMC-VQA/images/PMC5135102_pone.0167517.g003_101955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cellular organelle is indicated by the label \"n\" in the image?" }, { "from": "gpt", "value": "The answer is Nucleus" } ], "clip_score": 26.265625, "loss": 1.4226990938186646 }, { "id": "identity_321474", "image": "DVQA/images/bar_train_00146694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the royalblue color?" }, { "from": "gpt", "value": "The answer is score" } ], "clip_score": 23.203125, "loss": 3.6386656761169434 }, { "id": "identity_333485", "image": "TabMWP/images/16217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kids painted at least 3 flower pots last week?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 36.53125, "loss": 2.3113138675689697 }, { "id": "identity_40821", "image": "DVQA/images/bar_train_00022114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of 'flood' for 'hair'?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 23.984375, "loss": 3.1255781650543213 }, { "id": "identity_219328", "image": "VQA2.0/images/278043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in this image?" }, { "from": "gpt", "value": "The answer is Overcast" } ], "clip_score": 18.71875, "loss": 2.3259084224700928 }, { "id": "identity_208963", "image": "VQA2.0/images/331165.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the photographer?" }, { "from": "gpt", "value": "The answer is Jordi Verdugo" } ], "clip_score": 18.96875, "loss": 3.4569060802459717 }, { "id": "identity_303483", "image": "TabMWP/images/26597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each shelf has 10 pairs of pants. How many pairs of pants are on some shelves?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 38.21875, "loss": 2.106518268585205 }, { "id": "identity_298566", "image": "PMC-VQA/images/PMC4822243_Fig4_18737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image displays hyperenhancement at the border zone but not the core of an ablated lesion following FUS treatment?\nChoices:\n(A) T2-FSE image\n(B) DCE T1-FSE image\n(C) T1-FSE image\n(D) DWI image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.515625, "loss": 4.342889308929443 }, { "id": "identity_301205", "image": "Geometry3K/images/813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine L P.\nChoices:\n(A) 7\n(B) 10\n(C) 12\n(D) 15" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.0625, "loss": 2.6253061294555664 }, { "id": "identity_252477", "image": "A-OKVQA/images/263359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the terrain and surroundings, what type of ecosystem are these horseback riders most likely riding through, and what are two key characteristics of this ecosystem?" }, { "from": "gpt", "value": "The answer is Riparian ecosystem; proximity to water, moist-adapted vegetation" } ], "clip_score": 27.515625, "loss": 3.032902956008911 }, { "id": "identity_177022", "image": "TabMWP/images/19186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the inn host the least number of guests?" }, { "from": "gpt", "value": "The answer is Sunday and Wednesday" } ], "clip_score": 36.5625, "loss": 2.4765501022338867 }, { "id": "identity_92740", "image": "TabMWP/images/30204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of cars sold per month?" }, { "from": "gpt", "value": "The answer is 83.375" } ], "clip_score": 26.609375, "loss": 2.336737871170044 }, { "id": "identity_181138", "image": "IconQA/images/47704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both blue and circles?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.1875, "loss": 2.3521015644073486 }, { "id": "identity_119575", "image": "VQA-AS/images/6034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main subject of the painting on the wall?" }, { "from": "gpt", "value": "The answer is A landscape" } ], "clip_score": 22.734375, "loss": 3.996828079223633 }, { "id": "identity_204363", "image": "A-OKVQA/images/420236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the stripes on the truck?" }, { "from": "gpt", "value": "The answer is Red and blue" } ], "clip_score": 23.75, "loss": 2.902883768081665 }, { "id": "identity_199946", "image": "PMC-VQA/images/PMC4801854_F2_13423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete circular structures can be observed in the image?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 17.84375, "loss": 2.3636841773986816 }, { "id": "identity_297092", "image": "CLEVR-Math/images/CLEVR_train_008853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After removing all the brown cylinders, how many purple blocks remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.890625, "loss": 2.3930552005767822 }, { "id": "identity_96575", "image": "IconQA/images/49986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one person needs one sleeping bag and one pillow, how many people can be accommodated with the items shown?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.65625, "loss": 2.212717294692993 }, { "id": "identity_145786", "image": "DocVQA/images/hpbl0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the letter addressed?" }, { "from": "gpt", "value": "The answer is Mr. Don McKinley" } ], "clip_score": 26.28125, "loss": 2.928786277770996 }, { "id": "identity_307888", "image": "FigureQA/images/45931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8125, "loss": 4.1029863357543945 }, { "id": "identity_38548", "image": "TextVQA/images/fe858788a06ba30b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what´s the website on this picture?" }, { "from": "gpt", "value": "The answer is trojantalk.com" } ], "clip_score": 19.078125, "loss": 1.4084358215332031 }, { "id": "identity_81521", "image": "Geometry3K/images/1466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB = 60 and DE = 48, find the measure of angle AEB" }, { "from": "gpt", "value": "The answer is 126 degrees" } ], "clip_score": 32.65625, "loss": 1.9646496772766113 }, { "id": "identity_132418", "image": "VQA2.0/images/49640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sign on the traffic light pole say?" }, { "from": "gpt", "value": "The answer is DO NOT BLOCK" } ], "clip_score": 17.625, "loss": 3.2775044441223145 }, { "id": "identity_252791", "image": "PMC-VQA/images/PMC6712714_Fig2_29638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surgical hardware is used to stabilize the fracture, and what is its purpose?" }, { "from": "gpt", "value": "The answer is Metal plate with screws for stabilization and alignment" } ], "clip_score": 32.53125, "loss": 2.873408079147339 }, { "id": "identity_22512", "image": "GeoQA+/images/6011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C、D在圆上,且∠BAC=28°,则∠ADC=()\nChoices:\n(A) 52°\n(B) 56°\n(C) 62°\n(D) 72°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.109375, "loss": 2.3841075897216797 }, { "id": "identity_299306", "image": "A-OKVQA/images/365271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following advertisers is a watch company?\n(A) Armitron\n(B) Bud\n(C) Hershey's\n(D) Kroger" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.4296875, "loss": 3.868753433227539 }, { "id": "identity_214683", "image": "TextVQA/images/3742f95f789e4f47.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the action being performed by the player with the bat?" }, { "from": "gpt", "value": "The answer is Batting" } ], "clip_score": 23.640625, "loss": 3.2182884216308594 }, { "id": "identity_197145", "image": "TextVQA/images/3ba7d206327f9630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What equipment related to the sport is lying on the ground?" }, { "from": "gpt", "value": "The answer is A bat" } ], "clip_score": 17.84375, "loss": 3.4724252223968506 }, { "id": "identity_38591", "image": "IconQA/images/63354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many squares are there?\nChoices:\n(A) 10\n(B) 4\n(C) 7\n(D) 9\n(E) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.59375, "loss": 3.0091404914855957 }, { "id": "identity_41035", "image": "ChartQA/images/two_col_4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual gross domestic product growth for the years 2016 and 2017?" }, { "from": "gpt", "value": "The answer is 2.44%" } ], "clip_score": 24.78125, "loss": 1.5621501207351685 }, { "id": "identity_258287", "image": "VizWiz/images/VizWiz_train_00013440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are in the image, and what are they doing?" }, { "from": "gpt", "value": "The answer is Two birds, eating from a human hand" } ], "clip_score": 29.96875, "loss": 2.9016568660736084 }, { "id": "identity_106782", "image": "PMC-VQA/images/PMC5414954_pone.0173565.g001_159883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the age of the individual whose MRI scan is shown in the image?" }, { "from": "gpt", "value": "The answer is 35 years old" } ], "clip_score": 32.0, "loss": 1.7653999328613281 }, { "id": "identity_155007", "image": "ScienceQA/images/8345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the item the horse is wearing?" }, { "from": "gpt", "value": "The answer is Bridle" } ], "clip_score": 28.171875, "loss": 2.1352193355560303 }, { "id": "identity_263538", "image": "VizWiz/images/VizWiz_train_00015479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is pliers" } ], "clip_score": 30.65625, "loss": 3.1254091262817383 }, { "id": "identity_270648", "image": "FigureQA/images/73184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.640625, "loss": 4.559772491455078 }, { "id": "identity_97191", "image": "IconQA/images/55703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the frame is filled with shapes?" }, { "from": "gpt", "value": "The answer is 3/10" } ], "clip_score": 32.5625, "loss": 1.6776431798934937 }, { "id": "identity_285675", "image": "DocVQA/images/nrmg0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the table?" }, { "from": "gpt", "value": "The answer is room schedule" } ], "clip_score": 22.078125, "loss": 3.5342016220092773 }, { "id": "identity_192502", "image": "VizWiz/images/VizWiz_train_00007008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the radio's body?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.328125, "loss": 3.4553749561309814 }, { "id": "identity_135478", "image": "DocVQA/images/nkcg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cost differential per carton for Winston, Camel, and Doral cigarettes?" }, { "from": "gpt", "value": "The answer is $1.50" } ], "clip_score": 16.734375, "loss": 1.7970820665359497 }, { "id": "identity_270016", "image": "GeoQA+/images/3317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, D and E are the midpoints of sides AB and AC of triangle ABC, point F lies on DE, and ∠AFB = 90°. If AB = 5 and BC = 8, what is the length of EF?\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) 0.4" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.3125, "loss": 2.185641050338745 }, { "id": "identity_229639", "image": "GeoQA+/images/4366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,若∠BCD=110°,求∠BAD的度数。" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 32.46875, "loss": 1.5477211475372314 }, { "id": "identity_289707", "image": "A-OKVQA/images/167456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is displayed on the boats? \nOptions:\n(A) Sweden\n(B) Finland\n(C) Iceland\n(D) Norway" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.65625, "loss": 3.5992493629455566 }, { "id": "identity_230795", "image": "DocVQA/images/qrdw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of different contact methods provided for Stephen M. Parker, and list them?" }, { "from": "gpt", "value": "The answer is Three contact methods: phone, fax, and email" } ], "clip_score": 22.859375, "loss": 2.3589837551116943 }, { "id": "identity_66518", "image": "ScienceQA/images/19245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the South pole on both magnets?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 21.046875, "loss": 3.0584964752197266 }, { "id": "identity_262268", "image": "AI2D/images/1217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where do the biochemical processes of respiration and energy production take place within the cell?\nChoices:\n(A) K\n(B) C\n(C) P\n(D) H" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.515625, "loss": 3.607710599899292 }, { "id": "identity_200461", "image": "VizWiz/images/VizWiz_train_00014842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the toaster?" }, { "from": "gpt", "value": "The answer is Cuisinart" } ], "clip_score": 20.453125, "loss": 2.334704875946045 }, { "id": "identity_189491", "image": "TextVQA/images/5f0c8fe763efeb62.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the stage?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.890625, "loss": 4.3330488204956055 }, { "id": "identity_267218", "image": "FigureQA/images/84841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.671875, "loss": 3.981447696685791 }, { "id": "identity_319658", "image": "VQA2.0/images/22536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the hair short?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.5, "loss": 4.337288856506348 }, { "id": "identity_99762", "image": "CLEVR-Math/images/CLEVR_train_017607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the small brown cube?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.75, "loss": 2.7184436321258545 }, { "id": "identity_84633", "image": "UniGeo/images/1520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of triangle ADE to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 37.90625, "loss": 2.9206759929656982 }, { "id": "identity_125521", "image": "VQA-RAD/images/synpic27277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a fracture in the visible bones?" }, { "from": "gpt", "value": "The answer is No evidence of fracture detected" } ], "clip_score": 23.71875, "loss": 3.516185998916626 }, { "id": "identity_330398", "image": "PMC-VQA/images/PMC8811295_F1_183380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the cerebral microbleeds located in the SWI image?\nChoices:\n(A) Right basal ganglia\n(B) Left basal ganglia\n(C) Cerebellum\n(D) Frontal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.328125, "loss": 4.27006196975708 }, { "id": "identity_98199", "image": "TabMWP/images/22406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a senior ticket cost than a child ticket at the Cleveland Zoo?" }, { "from": "gpt", "value": "The answer is $3" } ], "clip_score": 37.59375, "loss": 2.2118608951568604 }, { "id": "identity_161251", "image": "UniGeo/images/3240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if AB = 3, BC = 5, and BE bisects angle ABC, what is the measure of angle ABE?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 31.96875, "loss": 2.07007098197937 }, { "id": "identity_228823", "image": "FigureQA/images/34367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank the colors from the largest to the smallest percentage in the pie chart" }, { "from": "gpt", "value": "The answer is Periwinkle, Dark Gold, Brown" } ], "clip_score": 43.84375, "loss": 1.359783411026001 }, { "id": "identity_178362", "image": "IconQA/images/37148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pencil sharpener is 2 inches long, how long is the whiteboard marker in inches?" }, { "from": "gpt", "value": "The answer is 6 inches" } ], "clip_score": 25.546875, "loss": 2.066904067993164 }, { "id": "identity_37763", "image": "A-OKVQA/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the standing man doing with his arms?\nChoices:\n(A) lifting\n(B) hiding\n(C) folding\n(D) waving" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.9453125, "loss": 4.4855217933654785 }, { "id": "identity_113597", "image": "PMC-VQA/images/PMC7899876_F1_372376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of retinal detachment in the image?" }, { "from": "gpt", "value": "The answer is No sign of retinal detachment" } ], "clip_score": 32.5625, "loss": 2.621066093444824 }, { "id": "identity_82747", "image": "Geometry3K/images/1430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangles LMN and QRS are congruent, find the length of side LN" }, { "from": "gpt", "value": "The answer is 51" } ], "clip_score": 33.90625, "loss": 2.960562229156494 }, { "id": "identity_241856", "image": "VQA2.0/images/474472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many computer screens are turned on and what is displayed on each of them?" }, { "from": "gpt", "value": "The answer is four screens are turned on, displaying text and webpages" } ], "clip_score": 26.765625, "loss": 2.62249493598938 }, { "id": "identity_122190", "image": "VQA-RAD/images/synpic50962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the fourth ventricle present in this section of the MRI scan?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.75, "loss": 2.988651752471924 }, { "id": "identity_17877", "image": "TextVQA/images/006d10667d17b924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what type of meeting is it?" }, { "from": "gpt", "value": "The answer is rimini" } ], "clip_score": 25.421875, "loss": 5.382030963897705 }, { "id": "identity_159498", "image": "GeoQA+/images/3880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC in the given circle?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 35.03125, "loss": 2.2173075675964355 }, { "id": "identity_138498", "image": "MapQA/images/map_6472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the map?" }, { "from": "gpt", "value": "The answer is The Percentage of ideas in the USA" } ], "clip_score": 38.9375, "loss": 1.7999368906021118 }, { "id": "identity_148765", "image": "DocVQA/images/kyfm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many control subjects were there in the study mentioned in Table 4?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 24.21875, "loss": 2.071141481399536 }, { "id": "identity_127598", "image": "TextVQA/images/70cdf6bb108c760a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What information is provided on the white paper attached to the board?" }, { "from": "gpt", "value": "The answer is Artist name, artwork title, year, and description" } ], "clip_score": 20.40625, "loss": 2.477764129638672 }, { "id": "identity_254940", "image": "IconQA/images/44519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Ann's train ride started at 6:30 P.M. and the journey takes 1 hour and 45 minutes, what time will it be when she arrives at her destination?" }, { "from": "gpt", "value": "The answer is 8:15 P.M" } ], "clip_score": 26.078125, "loss": 1.0822296142578125 }, { "id": "identity_56243", "image": "DocVQA/images/phpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the first protein-containing beverage mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Saci" } ], "clip_score": 17.4375, "loss": 5.126282691955566 }, { "id": "identity_39058", "image": "ChartQA/images/two_col_101562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the graph reach the highest value?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.609375, "loss": 2.716233015060425 }, { "id": "identity_312710", "image": "TextVQA/images/3b4449c39aa9f52b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this?" }, { "from": "gpt", "value": "The answer is toshiba" } ], "clip_score": 26.859375, "loss": 2.800302267074585 }, { "id": "identity_325856", "image": "IconQA/images/96857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lester is putting away the dishes. The clock shows the time. What time is it?\nChoices:\n(A) 7:00 A.M.\n(B) 7:00 P.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.09375, "loss": 3.7926998138427734 }, { "id": "identity_176997", "image": "TabMWP/images/35339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more straight-A students were there in 2011 compared to 2012?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.09375, "loss": 2.3462612628936768 }, { "id": "identity_98374", "image": "IconQA/images/59674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock were to lose 5 minutes, what time would it show?" }, { "from": "gpt", "value": "The answer is 4:10" } ], "clip_score": 26.984375, "loss": 1.369179606437683 }, { "id": "identity_136947", "image": "DocVQA/images/fmmg0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of Section B?" }, { "from": "gpt", "value": "The answer is Microchemical Assays of the Liver Lobule in Cold-acclimatized Rats" } ], "clip_score": 18.453125, "loss": 2.9113574028015137 }, { "id": "identity_187273", "image": "PMC-VQA/images/PMC4814887_phy212730-fig-0002_16870.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of graph is used to represent the data?" }, { "from": "gpt", "value": "The answer is Bar graph" } ], "clip_score": 12.65625, "loss": 2.738229751586914 }, { "id": "identity_61937", "image": "DocVQA/images/smyl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter?" }, { "from": "gpt", "value": "The answer is November 20, 1953" } ], "clip_score": 22.71875, "loss": 1.6262257099151611 }, { "id": "identity_139135", "image": "ChartQA/images/2558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the percentages for the major reason \"Partner is not ready to make that kind of commitment\" and the minor reason \"They are not far enough along in their job or career\"?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 26.375, "loss": 2.0822179317474365 }, { "id": "identity_162424", "image": "GeoQA+/images/905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle O, if AE is parallel to BC and ∠ECD = 50°, what is the measure of ∠DEC?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 35.90625, "loss": 1.6982077360153198 }, { "id": "identity_95101", "image": "TabMWP/images/16010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of states visited by the students?" }, { "from": "gpt", "value": "The answer is 33.875" } ], "clip_score": 26.125, "loss": 2.297400951385498 }, { "id": "identity_55704", "image": "DocVQA/images/glch0224_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the payee percentage for the first lien holder?" }, { "from": "gpt", "value": "The answer is 100%" } ], "clip_score": 20.25, "loss": 1.7566816806793213 }, { "id": "identity_124863", "image": "PMC-VQA/images/PMC5409948_F2_158094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is shown in the image?" }, { "from": "gpt", "value": "The answer is X-ray angiogram" } ], "clip_score": 31.3125, "loss": 2.483288526535034 }, { "id": "identity_240915", "image": "MapQA/images/map_2336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage range of panels for the states colored in dark blue?" }, { "from": "gpt", "value": "The answer is 48.0%-53.8%" } ], "clip_score": 33.65625, "loss": 1.6556042432785034 }, { "id": "identity_273469", "image": "ChartQA/images/10507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is represented by the green line in the data?" }, { "from": "gpt", "value": "The answer is Obama" } ], "clip_score": 20.578125, "loss": 2.7690975666046143 }, { "id": "identity_27962", "image": "CLEVR-Math/images/CLEVR_train_035778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all blue metallic cylinders. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.421875, "loss": 2.2691843509674072 }, { "id": "identity_214089", "image": "VQA2.0/images/562393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs of the zebras are visible in the image?" }, { "from": "gpt", "value": "The answer is Eight" } ], "clip_score": 30.484375, "loss": 3.2885358333587646 }, { "id": "identity_291356", "image": "UniGeo/images/479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is a chord of circle O with a length of 10. Point C is a moving point on circle O, and angle C is 45°. If points M and N are the midpoints of chords AB and AC respectively, what is the maximum value of MN?\nChoices:\n(A) 5√{2}\n(B) 5\n(C) √{5}\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.96875, "loss": 2.2642924785614014 }, { "id": "identity_137381", "image": "ChartQA/images/multi_col_100579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value for Bond funds over the period shown?" }, { "from": "gpt", "value": "The answer is 4.7%" } ], "clip_score": 27.0, "loss": 1.760204553604126 }, { "id": "identity_278563", "image": "VQA-AS/images/2972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl currently skipping rope?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 30.40625, "loss": 3.682208299636841 }, { "id": "identity_90814", "image": "UniGeo/images/2001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the base AB in the diagram?" }, { "from": "gpt", "value": "The answer is AB = 40.96 cm" } ], "clip_score": 31.390625, "loss": 2.669915199279785 }, { "id": "identity_139474", "image": "DVQA/images/bar_train_00138942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest average accuracy across both datasets?" }, { "from": "gpt", "value": "The answer is autumn" } ], "clip_score": 40.1875, "loss": 2.6576099395751953 }, { "id": "identity_100823", "image": "TabMWP/images/28320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of games won between the highest and lowest winning years?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.03125, "loss": 2.826716423034668 }, { "id": "identity_274842", "image": "A-OKVQA/images/491857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are the boaters planning to engage in at their destination?\nChoices:\n(A) sight see\n(B) shop\n(C) party\n(D) swim" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.890625, "loss": 4.343110084533691 }, { "id": "identity_130047", "image": "A-OKVQA/images/561919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the umbrellas?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.71875, "loss": 3.7495574951171875 }, { "id": "identity_203502", "image": "PMC-VQA/images/PMC8732915_Fig2_156809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this PET scan image showing the male or female pelvis?" }, { "from": "gpt", "value": "The answer is Female pelvis" } ], "clip_score": 31.90625, "loss": 2.1791396141052246 }, { "id": "identity_129597", "image": "PMC-VQA/images/PMC4101714_F2_306698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the lowest intensity on the scale?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.984375, "loss": 3.5139458179473877 }, { "id": "identity_331569", "image": "TQA/images/question_images/human_system_circulatory_3637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many kinds of artery are depicted in the diagram?\nChoices:\n(A) 2\n(B) 4\n(C) 1\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.328125, "loss": 3.4499576091766357 }, { "id": "identity_60872", "image": "DocVQA/images/kkhf0227_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What will the sub-panel define regarding potash and phosphate?" }, { "from": "gpt", "value": "The answer is What, where, when needed, and capital required" } ], "clip_score": 18.203125, "loss": 4.720923900604248 }, { "id": "identity_60349", "image": "DocVQA/images/yynk0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total premium amount payable for this policy?" }, { "from": "gpt", "value": "The answer is $22,000,000" } ], "clip_score": 26.375, "loss": 1.5199477672576904 }, { "id": "identity_334377", "image": "IconQA/images/30475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a crab. What is the eighth image?\nChoices:\n(A) boat\n(B) crab\n(C) bucket" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 3.7641117572784424 }, { "id": "identity_313098", "image": "IconQA/images/35368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a plane. Which image is ninth?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.03125, "loss": 4.2154927253723145 }, { "id": "identity_120816", "image": "TextVQA/images/00ac03de349a3c5b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fruit in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 26.8125, "loss": 2.569035768508911 }, { "id": "identity_162350", "image": "GeoQA+/images/3412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the triangle is moved along the side of the rectangle so that the right angle vertex touches the opposite corner of the rectangle, what will be the measure of angle 4, the angle between the hypotenuse of the triangle and the bottom side of the rectangle?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 31.828125, "loss": 2.255460739135742 }, { "id": "identity_320345", "image": "ScienceQA/images/3529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this canoe made of?\nChoices:\n(A) clay\n(B) wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.109375, "loss": 3.993957996368408 }, { "id": "identity_11572", "image": "DocVQA/images/lzhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date mentioned ?" }, { "from": "gpt", "value": "The answer is May 4, 1956" } ], "clip_score": 16.90625, "loss": 2.0728049278259277 }, { "id": "identity_265694", "image": "PMC-VQA/images/PMC9251490_F1_330210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging view was utilized to assess the right ventricular Tei index?\nChoices:\n(A) Apical 2-chamber view\n(B) Parasternal long-axis view\n(C) Apical 4-chamber view\n(D) Subcostal view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.78125, "loss": 4.356764793395996 }, { "id": "identity_256187", "image": "DocVQA/images/mmfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who wrote the letter to Dr. Robert Shank, and what is their position at Duke University Medical Center?" }, { "from": "gpt", "value": "The answer is Dorothy Tate, Director of Dietetics" } ], "clip_score": 28.640625, "loss": 3.001899003982544 }, { "id": "identity_127944", "image": "VizWiz/images/VizWiz_train_00011042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the box of chocolate thins?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 27.203125, "loss": 3.350803852081299 }, { "id": "identity_247003", "image": "TextVQA/images/22c54eec31ca4ab6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the shop and what type of products does it primarily sell?" }, { "from": "gpt", "value": "The answer is Flesor's; primarily sells candies and sweets" } ], "clip_score": 26.515625, "loss": 2.430255174636841 }, { "id": "identity_291402", "image": "A-OKVQA/images/9946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the lack of clarity in the man's image that prevents us from seeing him clearly as we should?\nChoices:\n(A) distortion effect\n(B) blur\n(C) watermark\n(D) picture overexposed" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0546875, "loss": 4.792606353759766 }, { "id": "identity_90838", "image": "GEOS/images/059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the square ABCD?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 35.90625, "loss": 1.7037878036499023 }, { "id": "identity_42838", "image": "DVQA/images/bar_train_00106274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people who prefer the object fun between the categories mother and mind?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.390625, "loss": 2.2941906452178955 }, { "id": "identity_13024", "image": "DocVQA/images/npjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: which school did Miss Emma Cadwalader go" }, { "from": "gpt", "value": "The answer is Shipley School" } ], "clip_score": 19.078125, "loss": 2.1434783935546875 }, { "id": "identity_287923", "image": "PMC-VQA/images/PMC7072145_cancers-12-00371-f004_134014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of image displayed in A from the following options:\n(A) Axial CT slice\n(B) Maximum intensity projection (MIP) PET image\n(C) Axial PET slice\n(D) Axial PET/CT fused slice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 3.8781020641326904 }, { "id": "identity_259140", "image": "TQA/images/question_images/waves_interactions_interference_7682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of waves in all the diagrams combined?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 31.71875, "loss": 1.9869319200515747 }, { "id": "identity_77599", "image": "AI2D/images/4413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of leaf has a tip that extends beyond the leaf margin forming a tail-like extension?" }, { "from": "gpt", "value": "The answer is Acuminate" } ], "clip_score": 32.96875, "loss": 2.654890298843384 }, { "id": "identity_222616", "image": "PMC-VQA/images/PMC6334729_Fig2_423620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lobe of the lung is the lesion located in?" }, { "from": "gpt", "value": "The answer is Right lower lobe" } ], "clip_score": 31.296875, "loss": 2.6319613456726074 }, { "id": "identity_99617", "image": "CLEVR-Math/images/CLEVR_train_062661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.5625, "loss": 1.887729525566101 }, { "id": "identity_226824", "image": "DVQA/images/bar_train_00059642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the items 'room' and 'gate' in the store 'post'?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.328125, "loss": 2.610703706741333 }, { "id": "identity_260409", "image": "CLEVR-Math/images/CLEVR_train_055041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 5 yellow metallic balls to the existing yellow metallic balls, and then remove 2 yellow metallic balls, how many yellow metallic balls will remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.453125, "loss": 2.8916847705841064 }, { "id": "identity_184680", "image": "PMC-VQA/images/PMC7442462_F2_238763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a mass effect or midline shift in this brain MRI?" }, { "from": "gpt", "value": "The answer is No evidence of mass effect or midline shift" } ], "clip_score": 34.59375, "loss": 2.0775301456451416 }, { "id": "identity_191717", "image": "PMC-VQA/images/PMC6883550_Fig2_75821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the white arrow in the image?" }, { "from": "gpt", "value": "The answer is Indicates area of interest or importance" } ], "clip_score": 17.265625, "loss": 3.3766002655029297 }, { "id": "identity_126424", "image": "VQA2.0/images/406205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition is depicted in the image?" }, { "from": "gpt", "value": "The answer is Snowy" } ], "clip_score": 25.6875, "loss": 3.3281471729278564 }, { "id": "identity_84374", "image": "GeoQA+/images/2994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with AB = AC = 3, BC = 2, and arcs intersecting at M and N, what is the ratio of the area of triangle AMN to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 4sqrt(2)" } ], "clip_score": 33.71875, "loss": 2.528430223464966 }, { "id": "identity_187297", "image": "TextVQA/images/1bb1b5385b2beac4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the runner's shoes?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 16.59375, "loss": 3.2593605518341064 }, { "id": "identity_311313", "image": "MapQA/images/map_9852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value in Alaska the highest in this region?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.296875, "loss": 3.0304157733917236 }, { "id": "identity_49660", "image": "FigureQA/images/34534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Midnight Blue section in the pie chart?" }, { "from": "gpt", "value": "The answer is 12.5%" } ], "clip_score": 38.1875, "loss": 1.3195559978485107 }, { "id": "identity_174680", "image": "IconQA/images/60988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour and minute hands?" }, { "from": "gpt", "value": "The answer is 0 degrees" } ], "clip_score": 31.140625, "loss": 2.3736748695373535 }, { "id": "identity_37450", "image": "TabMWP/images/3801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Harold went to the store and bought 3 pounds of hemp flour and 3 pounds of unbleached white flour. How much did he spend?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 30.796875, "loss": 3.0887038707733154 }, { "id": "identity_147238", "image": "FigureQA/images/35555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the color Medium Seafoam?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.25, "loss": 2.361107587814331 }, { "id": "identity_273361", "image": "TabMWP/images/4045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In a trivia game played by friends, what is the range of the recorded scores?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 25.046875, "loss": 3.936573028564453 }, { "id": "identity_244339", "image": "DocVQA/images/kkhf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many publications on Food Balances are enclosed, and what is the total number of countries covered by these publications?" }, { "from": "gpt", "value": "The answer is 5 publications, 90 countries" } ], "clip_score": 17.0625, "loss": 2.4428446292877197 }, { "id": "identity_254986", "image": "ScienceQA/images/2215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock is shale, and what are its primary components?" }, { "from": "gpt", "value": "The answer is Sedimentary rock, primarily clay minerals and quartz" } ], "clip_score": 27.640625, "loss": 2.1556365489959717 }, { "id": "identity_230657", "image": "DocVQA/images/phwg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total of the RJR account no and the RJR territory no given in the form?" }, { "from": "gpt", "value": "The answer is 899218" } ], "clip_score": 21.453125, "loss": 2.554518938064575 }, { "id": "identity_257178", "image": "A-OKVQA/images/545022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the guy wearing a black hat doing while the horses are pulling the carriage?" }, { "from": "gpt", "value": "The answer is talking while the horses are pulling the carriage" } ], "clip_score": 28.359375, "loss": 2.3514561653137207 }, { "id": "identity_289600", "image": "VQA2.0/images/279334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For what purpose is the rotten wood shown in this image?" }, { "from": "gpt", "value": "The answer is fence" } ], "clip_score": 22.28125, "loss": 2.969329833984375 }, { "id": "identity_78761", "image": "TQA/images/question_images/periodic_table_7358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the Transition Metals on this periodic table?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 33.53125, "loss": 3.2019214630126953 }, { "id": "identity_31212", "image": "VQA2.0/images/37169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of weather it is?" }, { "from": "gpt", "value": "The answer is nice" } ], "clip_score": 13.5859375, "loss": 3.7620673179626465 }, { "id": "identity_190769", "image": "VQA2.0/images/252407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are paddling in the dragon boat?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 34.375, "loss": 1.7049833536148071 }, { "id": "identity_281705", "image": "GeoQA+/images/1892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C are on circle O. If ∠BOC is 50°, what is the degree measure of ∠A?\nChoices:\n(A) 25°\n(B) 20°\n(C) 80°\n(D) 100°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.84375, "loss": 2.3712687492370605 }, { "id": "identity_39536", "image": "FigureQA/images/64404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for Slate?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 26.5, "loss": 2.401115894317627 }, { "id": "identity_174567", "image": "TabMWP/images/27968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Lionel's total earnings went to other taxes?" }, { "from": "gpt", "value": "The answer is 7.65%" } ], "clip_score": 29.921875, "loss": 1.560361623764038 }, { "id": "identity_282426", "image": "PMC-VQA/images/PMC6899010_DMM042200F1_80328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What substance is being visualized in the brain?\nChoices:\n(A) Dopamine\n(B) Serotonin\n(C) Tyrosine hydroxylase\n(D) Glutamate" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.875, "loss": 4.008432865142822 }, { "id": "identity_289919", "image": "PMC-VQA/images/PMC6966383_Fig6_101771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which medical imaging technique was utilized to evaluate bleeding? \nOptions:\n(A) X-ray \n(B) MRI \n(C) Ultrasound \n(D) CT Scan" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.40625, "loss": 3.894453525543213 }, { "id": "identity_318481", "image": "IconQA/images/99963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use these to measure the line. The line is about (_) of these long." }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 20.25, "loss": 2.541210412979126 }, { "id": "identity_166584", "image": "TabMWP/images/35249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 parakeets leave from 1 tree, how many parakeets will remain in that tree?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 27.828125, "loss": 2.16740345954895 }, { "id": "identity_55809", "image": "DocVQA/images/fmlg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direct materials cost under this contract?" }, { "from": "gpt", "value": "The answer is $119.37" } ], "clip_score": 21.53125, "loss": 2.337742805480957 }, { "id": "identity_329388", "image": "PMC-VQA/images/PMC6462967_children-06-00043-f010_465086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the abnormality shown in this ventricle?\nChoices:\n(A) dilatation\n(B) hypertrophy\n(C) hypotrophy\n(D) atrophy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.203125, "loss": 4.522143840789795 }, { "id": "identity_1963", "image": "IconQA/images/59732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are there?" }, { "from": "gpt", "value": "The answer is 53" } ], "clip_score": 28.78125, "loss": 2.445922613143921 }, { "id": "identity_20691", "image": "Super-CLEVR/images/superCLEVR_new_001249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny yellow bus?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.078125, "loss": 3.9564623832702637 }, { "id": "identity_299154", "image": "PlotQA/images/152396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest total debt amount across all years?" }, { "from": "gpt", "value": "The answer is 2386277000" } ], "clip_score": 26.828125, "loss": 2.4178526401519775 }, { "id": "identity_4572", "image": "ScienceQA/images/17671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nThe volume of Mars is more than three times as large as Mercury's.\nChoices:\n(A) false\n(B) true" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.46875, "loss": 3.919969081878662 }, { "id": "identity_156146", "image": "ScienceQA/images/17200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the backrest of the chair attached to the seat at an angle?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.171875, "loss": 3.4615490436553955 }, { "id": "identity_25916", "image": "VQA2.0/images/134849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lady making?" }, { "from": "gpt", "value": "The answer is pizza" } ], "clip_score": 24.65625, "loss": 3.4041223526000977 }, { "id": "identity_97225", "image": "IconQA/images/40236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of green marbles to the total number of marbles?" }, { "from": "gpt", "value": "The answer is 3:4" } ], "clip_score": 33.5, "loss": 1.5295217037200928 }, { "id": "identity_183395", "image": "Super-CLEVR/images/superCLEVR_new_000842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the smallest in the image?" }, { "from": "gpt", "value": "The answer is The scooter" } ], "clip_score": 26.5, "loss": 3.490757465362549 }, { "id": "identity_93529", "image": "IconQA/images/103957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock gains 10 minutes every hour, what will be the time shown after 6 hours?" }, { "from": "gpt", "value": "The answer is 9:00" } ], "clip_score": 29.984375, "loss": 1.7240893840789795 }, { "id": "identity_232792", "image": "PMC-VQA/images/PMC6266293_viruses-10-00632-f003_403750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green endosomes are visible in the image, and what is the total number of endosomes (of all colors) present?" }, { "from": "gpt", "value": "The answer is 4 green endosomes, 4 total endosomes" } ], "clip_score": 31.53125, "loss": 1.6455934047698975 }, { "id": "identity_278577", "image": "VQA-AS/images/16427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the location of the plants?" }, { "from": "gpt", "value": "The answer is floor" } ], "clip_score": 19.0, "loss": 4.242321491241455 }, { "id": "identity_337340", "image": "ScienceQA/images/20083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property is associated with this item?\nChoices:\n(A) hard\n(B) soft" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.1953125, "loss": 4.266955375671387 }, { "id": "identity_247011", "image": "GeoQA+/images/2533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知AB、AD是⊙O的弦,∠B=30°,点C在弦AB上,连接CO并延长交⊙O于点D,∠D=30°,若AB=10,求OC的长度。" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.296875, "loss": 2.278047800064087 }, { "id": "identity_222894", "image": "TextVQA/images/5af917d6bf6979b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price per pound for the petrified wood?" }, { "from": "gpt", "value": "The answer is $5.00" } ], "clip_score": 32.78125, "loss": 1.486265778541565 }, { "id": "identity_337901", "image": "VQA2.0/images/256683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they waiting?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.890625, "loss": 3.40522837638855 }, { "id": "identity_59291", "image": "ChartQA/images/two_col_1132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries produced more than 50 billion eggs in 2019?" }, { "from": "gpt", "value": "The answer is 6 countries" } ], "clip_score": 21.734375, "loss": 2.531034231185913 }, { "id": "identity_25278", "image": "DocVQA/images/nzxv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who says that they'd like to get the story to the publication by the October?" }, { "from": "gpt", "value": "The answer is Wayne" } ], "clip_score": 21.328125, "loss": 4.869081020355225 }, { "id": "identity_286634", "image": "DVQA/images/bar_train_00030438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all categories, which object is the least preferred by the total number of people?" }, { "from": "gpt", "value": "The answer is event" } ], "clip_score": 30.9375, "loss": 3.412583112716675 }, { "id": "identity_5250", "image": "TabMWP/images/28721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a chocolate cheesecake cost than an almond tart?" }, { "from": "gpt", "value": "The answer is 0.66" } ], "clip_score": 31.40625, "loss": 2.322078227996826 }, { "id": "identity_334373", "image": "PMC-VQA/images/PMC7841286_F1_354972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which eye is depicted in the image?\nChoices:\n(A) Left eye\n(B) Right eye\n(C) Both eyes\n(D) Cannot be determined" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.78125, "loss": 4.210751533508301 }, { "id": "identity_172015", "image": "TabMWP/images/6092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price of each pumpkin is $3, how much revenue did the pumpkin patch make from Friday to Sunday?" }, { "from": "gpt", "value": "The answer is $52,209" } ], "clip_score": 34.65625, "loss": 2.4131367206573486 }, { "id": "identity_233641", "image": "DocVQA/images/fmmp0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the total amounts for J. Enders, J. Goldstein, and M. Jenkins?" }, { "from": "gpt", "value": "The answer is $52,177.52" } ], "clip_score": 24.71875, "loss": 2.1905598640441895 }, { "id": "identity_34580", "image": "VizWiz/images/VizWiz_train_00000535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does my screen say?" }, { "from": "gpt", "value": "The answer is press ctrl alt delete to begin" } ], "clip_score": 27.09375, "loss": 2.0366592407226562 }, { "id": "identity_59766", "image": "DVQA/images/bar_train_00153111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of people preferring the object sleep the same as the number of people preferring the object whole in the category tool?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.65625, "loss": 3.0922176837921143 }, { "id": "identity_156697", "image": "AI2D/images/3211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the plant is involved in the process of photosynthesis?" }, { "from": "gpt", "value": "The answer is Leaf" } ], "clip_score": 24.671875, "loss": 2.514613389968872 }, { "id": "identity_216335", "image": "VizWiz/images/VizWiz_train_00008699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pants the person is wearing?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.359375, "loss": 3.5552711486816406 }, { "id": "identity_1929", "image": "ScienceQA/images/6879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Hawaii?\nChoices:\n(A) Baltimore\n(B) Hilo\n(C) Tallahassee\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.09375, "loss": 3.935683488845825 }, { "id": "identity_88782", "image": "GEOS/images/030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AC = DC = DB and angle ACD is 48 degrees, what is the area of triangle ACD?" }, { "from": "gpt", "value": "The answer is 50 square units" } ], "clip_score": 34.40625, "loss": 2.1818389892578125 }, { "id": "identity_123648", "image": "PMC-VQA/images/PMC6525736_Fig3_482783.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled in the brain image?" }, { "from": "gpt", "value": "The answer is Inferior Fronto-Occipital Fasciculus" } ], "clip_score": 27.6875, "loss": 1.0777584314346313 }, { "id": "identity_326344", "image": "VQA2.0/images/544601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this table made of?" }, { "from": "gpt", "value": "The answer is wood" } ], "clip_score": 14.984375, "loss": 3.3957419395446777 }, { "id": "identity_214056", "image": "PMC-VQA/images/PMC8122927_jcm-10-01834-f002_441031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique appears to have been used in this image?" }, { "from": "gpt", "value": "The answer is Immunohistochemistry (IHC)" } ], "clip_score": 29.671875, "loss": 1.4458680152893066 }, { "id": "identity_155906", "image": "AI2D/images/3849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the \"Spongy Layer\" in the leaf structure?" }, { "from": "gpt", "value": "The answer is Contains air spaces for gas exchange" } ], "clip_score": 26.4375, "loss": 2.9228341579437256 }, { "id": "identity_285112", "image": "ScienceQA/images/5621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: the Dominican Republic, Saint Kitts and Nevis, Dominica, or Haiti?\nChoices:\n(A) the Dominican Republic\n(B) Saint Kitts and Nevis\n(C) Dominica\n(D) Haiti" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.703125, "loss": 4.080894470214844 }, { "id": "identity_28567", "image": "DVQA/images/bar_train_00131153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.515625, "loss": 3.435166835784912 }, { "id": "identity_77827", "image": "TQA/images/question_images/cycle_water_1494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the arrows pointing from the sea to the cloud?" }, { "from": "gpt", "value": "The answer is Evaporation" } ], "clip_score": 31.015625, "loss": 2.476785182952881 }, { "id": "identity_165278", "image": "UniGeo/images/2963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that line m is parallel to line n and angle 1 is 20 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 29.84375, "loss": 1.8839596509933472 }, { "id": "identity_115499", "image": "VQA2.0/images/151307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the spire of the clock tower?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.890625, "loss": 3.577777147293091 }, { "id": "identity_2952", "image": "TQA/images/abc_question_images/parts_worm_17313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the oviduct?\nChoices:\n(A) T\n(B) U\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.03125, "loss": 4.561474800109863 }, { "id": "identity_152421", "image": "TQA/images/question_images/velocity_time_graphs_8222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which graph is the velocity (V) zero at the end of the observed time period (t)?" }, { "from": "gpt", "value": "The answer is The third graph (Constant deceleration)" } ], "clip_score": 28.90625, "loss": 1.9495623111724854 }, { "id": "identity_3898", "image": "TQA/images/question_images/ocean_waves_7119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What will happen to the water level if the height of the wave increases?\nChoices:\n(A) it will decrease\n(B) it will decrease then increase\n(C) it will remain constant\n(D) it will also increase" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.390625, "loss": 3.088618755340576 }, { "id": "identity_146909", "image": "DocVQA/images/ysbd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much is the mileage allowance for travel by private auto?" }, { "from": "gpt", "value": "The answer is Nine cents per mile" } ], "clip_score": 13.6640625, "loss": 2.488330841064453 }, { "id": "identity_144595", "image": "FigureQA/images/6705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Web Purple greater than 40?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.625, "loss": 3.5566372871398926 }, { "id": "identity_68369", "image": "ScienceQA/images/4591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean surrounds the highlighted country on the north and east sides?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 31.828125, "loss": 2.465709686279297 }, { "id": "identity_230425", "image": "IconQA/images/49113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of empty squares in the frame?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.21875, "loss": 2.287020444869995 }, { "id": "identity_178254", "image": "TabMWP/images/18471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is card stock scrap compared to parchment scrap per pound?" }, { "from": "gpt", "value": "The answer is $2 more expensive" } ], "clip_score": 35.0625, "loss": 2.2661471366882324 }, { "id": "identity_134457", "image": "DVQA/images/bar_train_00100090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the \"beat\" type have a consistent value decrease from \"march\" to \"thick\"?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.296875, "loss": 3.0847511291503906 }, { "id": "identity_208856", "image": "A-OKVQA/images/576650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the statue in the background holding up?" }, { "from": "gpt", "value": "The answer is Right hand" } ], "clip_score": 28.609375, "loss": 3.583157539367676 }, { "id": "identity_305712", "image": "DVQA/images/bar_train_00122481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest total accuracy across all datasets?" }, { "from": "gpt", "value": "The answer is north" } ], "clip_score": 30.28125, "loss": 3.9051613807678223 }, { "id": "identity_198478", "image": "PMC-VQA/images/PMC6124378_f1_363672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any dental pathology be observed in this radiograph?" }, { "from": "gpt", "value": "The answer is Not definitively without further clinical correlation" } ], "clip_score": 34.5625, "loss": 3.944587469100952 }, { "id": "identity_131032", "image": "PMC-VQA/images/PMC8609801_Fig1_117453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the curvature at the lumbar region of the spine as indicated by the yellow arrow?" }, { "from": "gpt", "value": "The answer is Lordosis (inward curvature)" } ], "clip_score": 33.40625, "loss": 2.6930086612701416 }, { "id": "identity_177857", "image": "TabMWP/images/6546.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the grocery store sell the most bananas?" }, { "from": "gpt", "value": "The answer is Sunday" } ], "clip_score": 32.75, "loss": 2.5554325580596924 }, { "id": "identity_116413", "image": "PMC-VQA/images/PMC4686709_fig1_455026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abnormality indicated by the black arrow?" }, { "from": "gpt", "value": "The answer is Granuloma or small area of infection" } ], "clip_score": 27.484375, "loss": 3.3963496685028076 }, { "id": "identity_301078", "image": "ScienceQA/images/20242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following organisms is a tertiary consumer in this food web?\nChoices:\n(A) pine vole\n(B) beaver\n(C) swallowtail caterpillar\n(D) bobcat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.4375, "loss": 3.5090172290802 }, { "id": "identity_151952", "image": "ScienceQA/images/16953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.328125, "loss": 2.205629587173462 }, { "id": "identity_242747", "image": "Super-CLEVR/images/superCLEVR_new_000832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in the image, and what are their shapes and colors?" }, { "from": "gpt", "value": "The answer is Four objects: blue bicycle, green/yellow double bus, yellow single bus, red/blue airplanes" } ], "clip_score": 35.15625, "loss": 2.9258713722229004 }, { "id": "identity_2397", "image": "GeoQA+/images/3999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如下图所示,已知圆柱的高为8,底面半径为3,若用一个平面沿着上底的直径竖直向下截该圆柱,那么截面的面积为()\nChoices:\n(A) 24\n(B) 48\n(C) 32\n(D) 72" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.90625, "loss": 2.768685817718506 }, { "id": "identity_318673", "image": "Geometry3K/images/1673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine Q S.\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 9" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 2.7149908542633057 }, { "id": "identity_154810", "image": "ScienceQA/images/16594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the device shown in the image?" }, { "from": "gpt", "value": "The answer is Measuring weather conditions" } ], "clip_score": 22.875, "loss": 3.005265951156616 }, { "id": "identity_189255", "image": "A-OKVQA/images/488747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What mode of transportation is visible in the background?" }, { "from": "gpt", "value": "The answer is Rickshaws" } ], "clip_score": 22.828125, "loss": 3.193504810333252 }, { "id": "identity_19654", "image": "PlotQA/images/102357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph contain any zero values ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.578125, "loss": 3.239798069000244 }, { "id": "identity_245094", "image": "ScienceQA/images/4863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what factor is the volume of Jupiter greater than the combined volume of Mercury, Venus, Earth, and Mars?" }, { "from": "gpt", "value": "The answer is approximately 638" } ], "clip_score": 23.078125, "loss": 2.992220640182495 }, { "id": "identity_226000", "image": "TextVQA/images/e1940594396fbbc8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key design features of this LG phone, and how do they contribute to its functionality?" }, { "from": "gpt", "value": "The answer is Sleek back, rear camera with buttons, bottom ports and speakers" } ], "clip_score": 32.875, "loss": 3.9757490158081055 }, { "id": "identity_35418", "image": "PMC-VQA/images/PMC2946720_F0002_74914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is seen in the left hemithorax? \nChoices:\n(A) Atelectasis \n(B) Pneumothorax \n(C) Bronchiectasis \n(D) Interstitial lung disease " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.90625, "loss": 4.401993751525879 }, { "id": "identity_74591", "image": "ScienceQA/images/6462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the north?" }, { "from": "gpt", "value": "The answer is Utah" } ], "clip_score": 33.125, "loss": 2.940727949142456 }, { "id": "identity_196009", "image": "A-OKVQA/images/416101.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tennis player's shirt?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 26.109375, "loss": 3.2133026123046875 }, { "id": "identity_154811", "image": "ScienceQA/images/16594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which component of the weather station measures wind speed?" }, { "from": "gpt", "value": "The answer is Anemometer" } ], "clip_score": 30.703125, "loss": 2.2912957668304443 }, { "id": "identity_51041", "image": "PlotQA/images/142987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the percentage of electricity produced from renewable sources between Argentina and New Zealand in 1961?" }, { "from": "gpt", "value": "The answer is Approximately 2%" } ], "clip_score": 27.71875, "loss": 2.522246837615967 }, { "id": "identity_229344", "image": "ChartQA/images/OECD_ENROLMENT_RATE_IN_EARLY_CHILDHOOD_EDUCATION_BRA_RUS_000014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average enrollment rate for Brazil from 2010 to 2018?" }, { "from": "gpt", "value": "The answer is 85.89%" } ], "clip_score": 23.1875, "loss": 2.0207512378692627 }, { "id": "identity_10978", "image": "AI2D/images/1070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part represented by D in the diagram?\nChoices:\n(A) none of the above\n(B) nucleus\n(C) lysosome\n(D) golgi bodies" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.625, "loss": 4.346503257751465 }, { "id": "identity_162917", "image": "GeoQA+/images/2617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line a is parallel to line b, and the measure of angle 1 is 100 degrees, what is the measure of angle 2?" }, { "from": "gpt", "value": "The answer is 100 degrees" } ], "clip_score": 32.90625, "loss": 1.4635045528411865 }, { "id": "identity_314459", "image": "TabMWP/images/33167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy a bag of birdseed, a dog collar, and a cat toy?" }, { "from": "gpt", "value": "The answer is 8.44" } ], "clip_score": 36.625, "loss": 2.0300381183624268 }, { "id": "identity_317795", "image": "TabMWP/images/36239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each year, Lancaster holds a citywide relay and records the winning times. Based on the table, what was the rate of change between 2008 and 2009?" }, { "from": "gpt", "value": "The answer is -3" } ], "clip_score": 35.53125, "loss": 3.5640411376953125 }, { "id": "identity_172914", "image": "TabMWP/images/11965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bus trips available from the famous bridge to the university campus throughout the day?" }, { "from": "gpt", "value": "The answer is 3 trips" } ], "clip_score": 29.609375, "loss": 2.131636381149292 }, { "id": "identity_325155", "image": "IconQA/images/98462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to stop on?\nChoices:\n(A) blue\n(B) white" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.96875, "loss": 4.125384330749512 }, { "id": "identity_247116", "image": "PMC-VQA/images/PMC3919902_f1-etm-07-03-0583_265997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify and describe the radiographic features that differentiate fecal material from other possible contents in the right colon, and explain why these features are indicative of fecal material" }, { "from": "gpt", "value": "The answer is Granular or mottled opacities indicative of fecal material" } ], "clip_score": 26.796875, "loss": 2.8610153198242188 }, { "id": "identity_136976", "image": "DVQA/images/bar_train_00034715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'client' group have any values below 2?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.375, "loss": 3.26228666305542 }, { "id": "identity_291012", "image": "GeoQA+/images/3878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, quadrilateral ABCD is inscribed in circle O, with point E on the extension of DC. If ∠BCE = 105°, what is the degree measure of ∠BOD?\nChoices:\n(A) 150°\n(B) 105°\n(C) 75°\n(D) 165°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.6875, "loss": 2.326256513595581 }, { "id": "identity_128075", "image": "A-OKVQA/images/350303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the walls in this room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.78125, "loss": 4.1928815841674805 }, { "id": "identity_275532", "image": "PMC-VQA/images/PMC7271094_Fig1_184019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique was utilized for reconstruction in the conducted computed tomography angiography? \nOptions:\n(A) Colour Doppler study \n(B) Volume rendering technique \n(C) Pulmonary vein flow \n(D) Computed tomography angiography" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.734375, "loss": 3.971726179122925 }, { "id": "identity_94991", "image": "IconQA/images/88506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is used in a game where landing on pink means winning a prize, what is the chance of not winning a prize on a single spin?" }, { "from": "gpt", "value": "The answer is 7/8" } ], "clip_score": 32.1875, "loss": 2.146711587905884 }, { "id": "identity_278408", "image": "DocVQA/images/ftvn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the subject." }, { "from": "gpt", "value": "The answer is Voucher Approval" } ], "clip_score": 21.3125, "loss": 3.1819324493408203 }, { "id": "identity_187745", "image": "A-OKVQA/images/383905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many elephants can be seen in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.84375, "loss": 2.2327730655670166 }, { "id": "identity_300289", "image": "TextVQA/images/65e0dd541ab2a106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word before this?" }, { "from": "gpt", "value": "The answer is spaceway" } ], "clip_score": 18.453125, "loss": 3.099970579147339 }, { "id": "identity_193425", "image": "PMC-VQA/images/PMC8573337_F4_105370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Axial (transverse) orientation" } ], "clip_score": 28.484375, "loss": 3.577394962310791 }, { "id": "identity_68061", "image": "TQA/images/abc_question_images/skin_cross_section_17588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label S represent in the skin diagram?" }, { "from": "gpt", "value": "The answer is Sweat gland produces sweat for temperature regulation and excretion" } ], "clip_score": 31.671875, "loss": 2.349579334259033 }, { "id": "identity_206629", "image": "VQA2.0/images/16359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is visible on the back of the security personnel's shirt?" }, { "from": "gpt", "value": "The answer is SECURITY" } ], "clip_score": 9.5, "loss": 2.8764240741729736 }, { "id": "identity_11681", "image": "IconQA/images/44175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Rick is taking his new puppy for an afternoon walk. Rick's watch shows the time. What time is it?\nChoices:\n(A) 2:00 A.M.\n(B) 2:00 P.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.421875, "loss": 3.3888206481933594 }, { "id": "identity_33071", "image": "ScienceQA/images/6192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Virginia\n(B) Maryland\n(C) Connecticut\n(D) North Carolina" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.640625, "loss": 3.746788501739502 }, { "id": "identity_183584", "image": "A-OKVQA/images/141040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of chips can be seen on the table?" }, { "from": "gpt", "value": "The answer is Lay's" } ], "clip_score": 20.5, "loss": 1.5833014249801636 }, { "id": "identity_176400", "image": "TabMWP/images/1696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between North Dakota and Wyoming, which state had a greater increase in population density per square mile from 1990 to 2000?" }, { "from": "gpt", "value": "The answer is Wyoming" } ], "clip_score": 33.65625, "loss": 2.199246883392334 }, { "id": "identity_255860", "image": "GeoQA+/images/1789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线l上有三个正方形a,b,c,若a,c的面积分别为5和11,则b的边长为多少?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.578125, "loss": 2.5230371952056885 }, { "id": "identity_101386", "image": "CLEVR-Math/images/CLEVR_train_051532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.59375, "loss": 2.47839617729187 }, { "id": "identity_299398", "image": "TabMWP/images/18792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that Tommy is currently at Green Avenue and needs to reach Jackson Boulevard by 11:30 A.M., what time should he board the train?\nChoices:\n(A) 10:00 A.M.\n(B) 10:15 A.M.\n(C) 1:15 P.M.\n(D) 10:45 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.734375, "loss": 3.4591705799102783 }, { "id": "identity_205805", "image": "TextVQA/images/df824b1152781395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned on the image?" }, { "from": "gpt", "value": "The answer is 2/09/1992" } ], "clip_score": 12.8984375, "loss": 1.2432364225387573 }, { "id": "identity_92529", "image": "TabMWP/images/9313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of times soccer was played last week?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 35.90625, "loss": 2.566431760787964 }, { "id": "identity_38217", "image": "A-OKVQA/images/90393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are shown in the foreground?\nChoices:\n(A) evergreen\n(B) conifers\n(C) christmas\n(D) deciduous" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.35546875, "loss": 4.159767150878906 }, { "id": "identity_320261", "image": "UniGeo/images/2080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠B=90°,AB=3,BC=4,点D在BC上,以AC为对角线的所有平行四边形ADCE中,DE的最小值是多少?\n\n选项:\n(A) 2\n(B) 3\n(C) 4\n(D) 3.75" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.578125, "loss": 2.6449036598205566 }, { "id": "identity_285909", "image": "A-OKVQA/images/137108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the target of the person who is swatting?\nChoices:\n(A) fly\n(B) hungry bear\n(C) ant\n(D) tennis ball" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.359375, "loss": 4.572228908538818 }, { "id": "identity_116528", "image": "VQA2.0/images/188329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are visible inside the shop?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 34.8125, "loss": 2.043774127960205 }, { "id": "identity_84938", "image": "GeoQA+/images/756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠ADC = 30° and OD is the radius of the circle with center O, what is the measure of ∠DOC?" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 35.8125, "loss": 1.462094783782959 }, { "id": "identity_246086", "image": "ChartQA/images/two_col_20814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of COVID-19 deceased patients in Italy as of April 28, 2021, who had 2 or more comorbidities?" }, { "from": "gpt", "value": "The answer is 85.4%" } ], "clip_score": 28.34375, "loss": 1.491747498512268 }, { "id": "identity_1739", "image": "TextVQA/images/5719794943b820c9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what color does the top three clocks have written down?" }, { "from": "gpt", "value": "The answer is 12:34" } ], "clip_score": 26.109375, "loss": 2.3656961917877197 }, { "id": "identity_84443", "image": "UniGeo/images/2396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given rhombuses ABCD and ECGF with sides 4 and 6 respectively, and angle A being 120 degrees, what is the length of diagonal BD in rhombus ABCD?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.28125, "loss": 2.6699750423431396 }, { "id": "identity_328062", "image": "PMC-VQA/images/PMC7353793_Fig1_212030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the inflammatory lesion located on this chest CT scan? \nChoices:\n(A) Left upper lobe \n(B) Left lower lobe \n(C) Right upper lobe \n(D) Right lower lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.21875, "loss": 4.360049724578857 }, { "id": "identity_275796", "image": "DVQA/images/bar_train_00180834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the lowest total value when all values are added together?" }, { "from": "gpt", "value": "The answer is reply" } ], "clip_score": 22.703125, "loss": 3.252192258834839 }, { "id": "identity_93832", "image": "TabMWP/images/32858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many exhibits have an odd number of paintings?" }, { "from": "gpt", "value": "The answer is 10 exhibits" } ], "clip_score": 18.734375, "loss": 2.63205885887146 }, { "id": "identity_206550", "image": "A-OKVQA/images/48974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person second from the right wearing?" }, { "from": "gpt", "value": "The answer is Converse" } ], "clip_score": 19.953125, "loss": 4.279082775115967 }, { "id": "identity_70171", "image": "TQA/images/question_images/dna_6767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What component forms the helix structure of the DNA?" }, { "from": "gpt", "value": "The answer is Sugar-phosphates" } ], "clip_score": 31.1875, "loss": 2.1227810382843018 }, { "id": "identity_219293", "image": "TextVQA/images/ef2984aa210590d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the book is this chapter from?" }, { "from": "gpt", "value": "The answer is Part III" } ], "clip_score": 20.453125, "loss": 2.969393491744995 }, { "id": "identity_107712", "image": "PMC-VQA/images/PMC3328163_fig5_134184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many CT images are displayed?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.921875, "loss": 4.1318559646606445 }, { "id": "identity_197243", "image": "A-OKVQA/images/262006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible outside of the cars?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.109375, "loss": 3.04280161857605 }, { "id": "identity_256164", "image": "VQA-AS/images/9105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does the black grill have, and what is the combined number of legs of the grill and the people in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.796875, "loss": 3.899627685546875 }, { "id": "identity_175167", "image": "IconQA/images/30742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each sleeping bag needs one pillow, how many additional pillows are needed?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 32.59375, "loss": 2.384202241897583 }, { "id": "identity_84319", "image": "UniGeo/images/2724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point E is the midpoint of side CD in rectangle ABCD, and the extension of AD and BE intersect at point F, with DF = 3 and DE = 2, what is the length of diagonal BD?" }, { "from": "gpt", "value": "The answer is 2 * sqrt(72)" } ], "clip_score": 33.40625, "loss": 2.5042455196380615 }, { "id": "identity_102385", "image": "TabMWP/images/13068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mandy needs 2 kilograms of black olives for a party. How much will she spend?" }, { "from": "gpt", "value": "The answer is $4" } ], "clip_score": 30.390625, "loss": 3.073045492172241 }, { "id": "identity_64741", "image": "AI2D/images/4605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which fly has spotted wings?" }, { "from": "gpt", "value": "The answer is Syrphid fly" } ], "clip_score": 29.796875, "loss": 2.0477895736694336 }, { "id": "identity_240182", "image": "FigureQA/images/81437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average value over the entire range of the x-axis?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 27.125, "loss": 3.043055772781372 }, { "id": "identity_77553", "image": "TQA/images/question_images/human_system_respiratory_6201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lobes does the right lung have?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.6875, "loss": 2.363013982772827 }, { "id": "identity_248371", "image": "DVQA/images/bar_train_00050386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the 'hotel' label across all stacks?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 25.453125, "loss": 2.34394907951355 }, { "id": "identity_191242", "image": "VQA2.0/images/397192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any signage visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a sign on the window" } ], "clip_score": 16.8125, "loss": 2.3006250858306885 }, { "id": "identity_62081", "image": "DocVQA/images/ysmp0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the postal code for the location of the International Nutrition Planning Program?" }, { "from": "gpt", "value": "The answer is 02139" } ], "clip_score": 16.03125, "loss": 2.242046356201172 }, { "id": "identity_249159", "image": "DocVQA/images/xjjw0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of additions and reductions to the Balance Capital Equipment during 1982?" }, { "from": "gpt", "value": "The answer is $59,188" } ], "clip_score": 27.34375, "loss": 2.459242343902588 }, { "id": "identity_122939", "image": "A-OKVQA/images/545163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the road depicted in the image?" }, { "from": "gpt", "value": "The answer is Queen Elizabeth Hospital Road" } ], "clip_score": 24.671875, "loss": 2.343782424926758 }, { "id": "identity_75342", "image": "ScienceQA/images/7731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is represented in the image?" }, { "from": "gpt", "value": "The answer is Nonpolar covalent bond" } ], "clip_score": 20.765625, "loss": 2.63887095451355 }, { "id": "identity_69369", "image": "ScienceQA/images/14652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Diffusion" } ], "clip_score": 28.96875, "loss": 2.550194501876831 }, { "id": "identity_137222", "image": "DVQA/images/bar_train_00162776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the category \"mother\"?" }, { "from": "gpt", "value": "The answer is approximately 270" } ], "clip_score": 18.578125, "loss": 2.9368865489959717 }, { "id": "identity_307891", "image": "TextVQA/images/e6ab3f563bde9b2a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What month was this published?" }, { "from": "gpt", "value": "The answer is september" } ], "clip_score": 15.5078125, "loss": 3.6131224632263184 }, { "id": "identity_93572", "image": "TabMWP/images/37427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Emily need to buy a history book and a scrapbook?" }, { "from": "gpt", "value": "The answer is $30.00" } ], "clip_score": 34.71875, "loss": 1.7328917980194092 }, { "id": "identity_3423", "image": "VQA2.0/images/274012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fallen leaves?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.078125, "loss": 3.262892484664917 }, { "id": "identity_173778", "image": "TabMWP/images/29650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of roses on all bushes?" }, { "from": "gpt", "value": "The answer is 73" } ], "clip_score": 25.015625, "loss": 2.638629674911499 }, { "id": "identity_155922", "image": "ScienceQA/images/1568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total volume of liquid in the three flasks?" }, { "from": "gpt", "value": "The answer is 180 ml" } ], "clip_score": 29.5625, "loss": 1.6273998022079468 }, { "id": "identity_171039", "image": "TabMWP/images/11955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which event has the shortest duration?" }, { "from": "gpt", "value": "The answer is men's tennis match" } ], "clip_score": 29.921875, "loss": 2.5726895332336426 }, { "id": "identity_176299", "image": "IconQA/images/12208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both orange and square?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.453125, "loss": 2.3955414295196533 }, { "id": "identity_261488", "image": "PMC-VQA/images/PMC8537352_insects-12-00850-f002_92118.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed inside the gall?\nChoices:\n(A) Unknown spore and veins \n(B) Radial files of epidermal cells and silk thread \n(C) Stomata-like opening and unknown spore \n(D) Unknown spore and caterpillar" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.375, "loss": 4.308254718780518 }, { "id": "identity_234212", "image": "ChartQA/images/multi_col_80478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual increase in debt per borrower from 2003/04 to 2018/19?" }, { "from": "gpt", "value": "The answer is $280" } ], "clip_score": 26.890625, "loss": 2.619657039642334 }, { "id": "identity_267365", "image": "ChartQA/images/7872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the ratio of Scientists to small-farm owners." }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.359375, "loss": 2.7894184589385986 }, { "id": "identity_336405", "image": "CLEVR-Math/images/CLEVR_train_036099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all cylindrical shapes. How many items remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.125, "loss": 2.4656126499176025 }, { "id": "identity_308650", "image": "FigureQA/images/86347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.3515625, "loss": 3.859253406524658 }, { "id": "identity_287797", "image": "IconQA/images/100490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 33.0625, "loss": 3.0089893341064453 }, { "id": "identity_27722", "image": "TextVQA/images/5ccfd69e1490c595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what two letters are on the tail of the silver plane?" }, { "from": "gpt", "value": "The answer is njd" } ], "clip_score": 13.4921875, "loss": 4.758558750152588 }, { "id": "identity_163942", "image": "UniGeo/images/3356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if angle ABO is 55 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 125 degrees" } ], "clip_score": 34.09375, "loss": 1.7915505170822144 }, { "id": "identity_232727", "image": "Geometry3K/images/1376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that the diameter of \\(\\odot B\\) is 20 units, \\(m \\angle A C E = 45^\\circ\\), and \\(BD = 5x\\), find the length of \\(DE\\)" }, { "from": "gpt", "value": "The answer is \\(5\\sqrt{2}\\)" } ], "clip_score": 34.15625, "loss": 2.253253698348999 }, { "id": "identity_235903", "image": "A-OKVQA/images/26506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual indicators in the image that suggest the animals found water on a beach, and how do these indicators differentiate this location from a lake, pool, or well?" }, { "from": "gpt", "value": "The answer is Visual indicators include sandy shoreline, vast water horizon, and beach elements" } ], "clip_score": 32.0625, "loss": 3.4833908081054688 }, { "id": "identity_19156", "image": "GeoQA+/images/3167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB=AC,AD=AE,∠BAC=∠DAE,∠1=25°,∠2=30°,连接BE,点D恰好在BE上,则∠3=()\nChoices:\n(A) 60°\n(B) 55°\n(C) 50°\n(D) 0°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.03125, "loss": 2.8403563499450684 }, { "id": "identity_43375", "image": "MapQA/images/map_8234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the state of New York?" }, { "from": "gpt", "value": "The answer is 8-10" } ], "clip_score": 27.84375, "loss": 2.155315399169922 }, { "id": "identity_314640", "image": "PlotQA/images/138568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where were the global emissions of Sulfur hexafluoride highest in 2005?" }, { "from": "gpt", "value": "The answer is India" } ], "clip_score": 20.953125, "loss": 3.406874418258667 }, { "id": "identity_136101", "image": "FigureQA/images/74007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Medium Blue greater than 75 at x-axis 100?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.4375, "loss": 3.1577236652374268 }, { "id": "identity_6957", "image": "VQA2.0/images/66691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like?" }, { "from": "gpt", "value": "The answer is sunny" } ], "clip_score": 13.640625, "loss": 2.475970506668091 }, { "id": "identity_287707", "image": "IconQA/images/53715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture corresponds to the ninth image in the sequence, given that the first picture is a dog? \nChoices:\n(A) dog\n(B) house\n(C) paw" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.296875, "loss": 4.351259231567383 }, { "id": "identity_249965", "image": "TabMWP/images/703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of red gumdrops in all the bags that have at least 25 but less than 53 red gumdrops?" }, { "from": "gpt", "value": "The answer is 593" } ], "clip_score": 35.75, "loss": 2.4263768196105957 }, { "id": "identity_294118", "image": "ScienceQA/images/7116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object? \nChoices:\n(A) bouncy\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.359375, "loss": 3.984401226043701 }, { "id": "identity_207145", "image": "A-OKVQA/images/186018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 16.359375, "loss": 2.4030706882476807 }, { "id": "identity_322169", "image": "A-OKVQA/images/572095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What American state might this place be in?\nChoices:\n(A) milwaukee\n(B) deleware\n(C) new york\n(D) illinois" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.734375, "loss": 3.554943799972534 }, { "id": "identity_86243", "image": "UniGeo/images/2512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠AOB = 70°, what is the measure of ∠ABC?" }, { "from": "gpt", "value": "The answer is 35°" } ], "clip_score": 35.4375, "loss": 1.9356576204299927 }, { "id": "identity_69461", "image": "ScienceQA/images/999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.0, "loss": 4.823992729187012 }, { "id": "identity_61786", "image": "DocVQA/images/nrdd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there an accompanying report mentioned in the document?" }, { "from": "gpt", "value": "The answer is Yes, there is an accompanying report sent to the Food and Drug Administration" } ], "clip_score": 27.296875, "loss": 1.3867887258529663 }, { "id": "identity_107818", "image": "TextVQA/images/689df70804031e18.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What social media platforms does McKinney have a presence on according to the image?" }, { "from": "gpt", "value": "The answer is Twitter, Facebook, Flickr, YouTube" } ], "clip_score": 12.2421875, "loss": 1.8701021671295166 }, { "id": "identity_298150", "image": "TabMWP/images/16587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Mariana have sufficient funds, amounting to $118.50, to purchase both a chest of drawers and a set of bunk beds?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.8125, "loss": 3.9222664833068848 }, { "id": "identity_296225", "image": "VQA2.0/images/275348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which website is displayed on the green signs?" }, { "from": "gpt", "value": "The answer is bet365com" } ], "clip_score": 22.59375, "loss": 1.6701141595840454 }, { "id": "identity_275075", "image": "IconQA/images/104175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the first picture is a moon, which picture corresponds to the eighth position in the sequence? \nChoices:\n(A) star\n(B) moon\n(C) rocket" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.515625, "loss": 3.717806339263916 }, { "id": "identity_144369", "image": "DocVQA/images/fshk0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are listed with a specific check-out date?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.75, "loss": 2.538151502609253 }, { "id": "identity_1524", "image": "Super-CLEVR/images/superCLEVR_new_000547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other things that have the same color as the tiny rubber bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.3125, "loss": 5.142563819885254 }, { "id": "identity_19509", "image": "FigureQA/images/34870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orange the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 4.5606489181518555 }, { "id": "identity_293847", "image": "TabMWP/images/33741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children at the summer camp made a minimum of 2 candy necklaces according to Paula's monitoring?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 36.65625, "loss": 2.324772596359253 }, { "id": "identity_30152", "image": "DocVQA/images/jzbn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heading of the letter?" }, { "from": "gpt", "value": "The answer is American Chemistry Council" } ], "clip_score": 24.546875, "loss": 2.696106433868408 }, { "id": "identity_61774", "image": "DocVQA/images/hycv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is the \"Tea\" break scheduled for?" }, { "from": "gpt", "value": "The answer is 14:10 – 14:25" } ], "clip_score": 14.53125, "loss": 2.002445697784424 }, { "id": "identity_311350", "image": "VizWiz/images/VizWiz_train_00009966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this item?" }, { "from": "gpt", "value": "The answer is cup" } ], "clip_score": 29.078125, "loss": 3.3194668292999268 }, { "id": "identity_2478", "image": "DVQA/images/bar_train_00138906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is firm" } ], "clip_score": 22.578125, "loss": 4.078261375427246 }, { "id": "identity_27980", "image": "Super-CLEVR/images/superCLEVR_new_000195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer rubber tandem bikes than small cars?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.96875, "loss": 5.2229461669921875 }, { "id": "identity_224898", "image": "A-OKVQA/images/128110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the weather conditions indicated by the sign, what are the potential impacts on traffic and pedestrian movement in this area?" }, { "from": "gpt", "value": "The answer is Increased traffic congestion and pedestrian safety risks" } ], "clip_score": 27.21875, "loss": 2.2358503341674805 }, { "id": "identity_281135", "image": "ScienceQA/images/10632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Kentucky?\nChoices:\n(A) Columbia\n(B) Frankfort\n(C) Bismarck\n(D) Denver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.28125, "loss": 4.259941101074219 }, { "id": "identity_249201", "image": "GeoQA+/images/2967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,∠C=30°,AB=2,若⊙O的半径为r,求△OAB的面积。" }, { "from": "gpt", "value": "The answer is √3" } ], "clip_score": 31.09375, "loss": 3.3082916736602783 }, { "id": "identity_159914", "image": "GeoQA+/images/806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if AC = 6 and CG = 4, what is the measure of ∠ACG?" }, { "from": "gpt", "value": "The answer is ∠ACG = 90°" } ], "clip_score": 34.09375, "loss": 1.3812817335128784 }, { "id": "identity_69444", "image": "AI2D/images/1895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of food for the tarantula in this food web?" }, { "from": "gpt", "value": "The answer is Insects" } ], "clip_score": 30.03125, "loss": 2.146679401397705 }, { "id": "identity_183840", "image": "A-OKVQA/images/16964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vegetation visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 11.109375, "loss": 2.9849822521209717 }, { "id": "identity_338566", "image": "GeoQA+/images/839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是半圆的直径,CD为半圆的弦,且CD∥AB,∠ACD=26°,则∠B等于多少?\nChoices:\n(A) 26°\n(B) 36°\n(C) 64°\n(D) 74°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.53125, "loss": 2.3813724517822266 }, { "id": "identity_168126", "image": "TabMWP/images/23710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of fish in all the tanks combined?" }, { "from": "gpt", "value": "The answer is 270" } ], "clip_score": 28.03125, "loss": 2.245269298553467 }, { "id": "identity_236367", "image": "DVQA/images/bar_train_00127369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different patterns are used in the bars, and which datasets do they correspond to?" }, { "from": "gpt", "value": "The answer is Four patterns: rain, dozen, image, earth" } ], "clip_score": 28.078125, "loss": 3.369494676589966 }, { "id": "identity_313723", "image": "Geometry3K/images/249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Each triangle in the figure is a 30 - 60 - 90 triangle. Determine the value of x.\nChoices:\n(A) 2\n(B) 2.25\n(C) 3.5\n(D) 4.5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.5, "loss": 2.3872764110565186 }, { "id": "identity_254087", "image": "PMC-VQA/images/PMC5352873_fig1_140562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the tumor in square millimeters?" }, { "from": "gpt", "value": "The answer is approximately 235.62 square millimeters" } ], "clip_score": 29.234375, "loss": 1.9548397064208984 }, { "id": "identity_307830", "image": "FigureQA/images/5304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Deep Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.171875, "loss": 4.009838104248047 }, { "id": "identity_255442", "image": "PMC-VQA/images/PMC6720598_sensors-19-03526-f007_31811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between the two camera images shown in the pair, and how do these differences help identify the specific model of the motorcycle engine shown in the image?" }, { "from": "gpt", "value": "The answer is The key differences help identify the Yamaha YIC engine model" } ], "clip_score": 30.125, "loss": 4.03296422958374 }, { "id": "identity_159843", "image": "GeoQA+/images/2434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In quadrilateral ABCD, if ∠DAB = 90°, ∠DCB = 90°, and the lengths of AC and BD are given as 6 and 10 respectively, what is the length of diagonal DB?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.0625, "loss": 3.0968997478485107 }, { "id": "identity_290699", "image": "PMC-VQA/images/PMC7412277_materials-13-03186-f001_230475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what state was the alloy microstructure deformed as shown in the image? \nOptions:\n(A) Solid-solution\n(B) Annealed\n(C) Aged\n(D) Cold-worked" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.515625, "loss": 4.498970985412598 }, { "id": "identity_108094", "image": "Super-CLEVR/images/superCLEVR_new_001641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects in the image that resemble an aircraft?" }, { "from": "gpt", "value": "The answer is Yes, one purple aircraft" } ], "clip_score": 23.359375, "loss": 4.207098484039307 }, { "id": "identity_13848", "image": "DVQA/images/bar_train_00024710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are liked by less than 8 people?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 29.359375, "loss": 3.7969284057617188 }, { "id": "identity_163937", "image": "GeoQA+/images/3004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if angle ABD is 20 degrees, what is the perimeter of the rhombus if AB is 10 units?" }, { "from": "gpt", "value": "The answer is 40 units" } ], "clip_score": 32.375, "loss": 1.4876281023025513 }, { "id": "identity_206252", "image": "PMC-VQA/images/PMC7815743_Fig3_345489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the bladder?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 25.5, "loss": 3.047015428543091 }, { "id": "identity_198398", "image": "PMC-VQA/images/PMC6945254_f1_94970.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the ultrasound image?" }, { "from": "gpt", "value": "The answer is Transverse orientation" } ], "clip_score": 32.46875, "loss": 3.509613037109375 }, { "id": "identity_238517", "image": "VQA2.0/images/192994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the collar, and what is the dog lying on?" }, { "from": "gpt", "value": "The answer is The collar is red, and the dog is lying on a blue cushion" } ], "clip_score": 27.796875, "loss": 1.5119825601577759 }, { "id": "identity_205700", "image": "PMC-VQA/images/PMC6365311_Fig13_432977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this MRI image show any signs of brain atrophy?" }, { "from": "gpt", "value": "The answer is No signs of brain atrophy" } ], "clip_score": 34.6875, "loss": 2.5234687328338623 }, { "id": "identity_337693", "image": "VQA2.0/images/99498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this device?" }, { "from": "gpt", "value": "The answer is blackberry" } ], "clip_score": 29.71875, "loss": 3.201237916946411 }, { "id": "identity_85910", "image": "UniGeo/images/1983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle ACD is 25 degrees, what is the measure of angle CAD?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.46875, "loss": 1.938522458076477 }, { "id": "identity_225621", "image": "TQA/images/question_images/human_system_eye_6139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the pathway of light as it enters the eye and reaches the retina, including all the structures it passes through in order" }, { "from": "gpt", "value": "The answer is Cornea, aqueous humor, pupil, lens, vitreous humor, retina" } ], "clip_score": 31.859375, "loss": 1.1639533042907715 }, { "id": "identity_103478", "image": "A-OKVQA/images/180787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of meat is being prepared?" }, { "from": "gpt", "value": "The answer is Likely a leg of lamb or pork" } ], "clip_score": 24.96875, "loss": 2.7363829612731934 }, { "id": "identity_98526", "image": "TabMWP/images/2870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fewer riders were there in April compared to February?" }, { "from": "gpt", "value": "The answer is 4,893" } ], "clip_score": 32.75, "loss": 2.320528030395508 }, { "id": "identity_337225", "image": "IconQA/images/2508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first one is a turtle. What is the eighth one?\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.34375, "loss": 3.9038772583007812 }, { "id": "identity_83223", "image": "Geometry3K/images/177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 34.15625, "loss": 2.0420727729797363 }, { "id": "identity_53446", "image": "FigureQA/images/85577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in y-axis value between Coral and Crimson at x-axis value 100?" }, { "from": "gpt", "value": "The answer is Approximately 2" } ], "clip_score": 27.984375, "loss": 3.1035566329956055 }, { "id": "identity_99485", "image": "TabMWP/images/36504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest y values?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.515625, "loss": 2.098607063293457 }, { "id": "identity_278507", "image": "PMC-VQA/images/PMC4244565_Fig2_339256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed by the X-ray assessment?\nChoices:\n(A) Poor healing of the fracture \n(B) Moderate healing of the fracture \n(C) Good healing of the fracture \n(D) Complete healing of the fracture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.6875, "loss": 4.844237327575684 }, { "id": "identity_92120", "image": "IconQA/images/99941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cushions are there on the couch?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.375, "loss": 2.2218925952911377 }, { "id": "identity_163275", "image": "GeoQA+/images/4047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知点A,B,C依次在⊙O上,∠BOC的度数为120°,则∠BAC的度数为()" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 31.671875, "loss": 1.5150482654571533 }, { "id": "identity_118603", "image": "TextVQA/images/74d6f3f3e74ab89c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Admiral mentioned in the text on the right side of the comic cover?" }, { "from": "gpt", "value": "The answer is Rear Admiral Ben Moreell" } ], "clip_score": 29.484375, "loss": 2.3468246459960938 }, { "id": "identity_168394", "image": "TabMWP/images/31796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of ties per rack?" }, { "from": "gpt", "value": "The answer is 27.5 ties" } ], "clip_score": 31.796875, "loss": 2.400057315826416 }, { "id": "identity_212584", "image": "PMC-VQA/images/PMC3861471_f1-etm-07-01-0199_251398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of meniscal tear on the preoperative MRI scan?" }, { "from": "gpt", "value": "The answer is No meniscal tear is present" } ], "clip_score": 31.40625, "loss": 2.3802525997161865 }, { "id": "identity_201962", "image": "A-OKVQA/images/136458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pairs of eyeglasses are present in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 16.140625, "loss": 2.467801094055176 }, { "id": "identity_54946", "image": "DocVQA/images/fhvg0065_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Lot Number of the Acetone?" }, { "from": "gpt", "value": "The answer is 973293" } ], "clip_score": 15.5625, "loss": 2.730316638946533 }, { "id": "identity_78339", "image": "TQA/images/abc_question_images/solar_system_16294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the planet known for its prominent rings?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 27.4375, "loss": 2.7182695865631104 }, { "id": "identity_193438", "image": "VQA2.0/images/42876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of landing gear does the plane have?" }, { "from": "gpt", "value": "The answer is Conventional landing gear" } ], "clip_score": 25.515625, "loss": 2.7690041065216064 }, { "id": "identity_284260", "image": "TextVQA/images/3ff51c29c8072953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals in the squad?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 14.4453125, "loss": 1.842865228652954 }, { "id": "identity_185762", "image": "TextVQA/images/c45c47d81053869d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What team is the player wearing number 16 representing?" }, { "from": "gpt", "value": "The answer is Cincinnati Reds" } ], "clip_score": 31.25, "loss": 1.4885189533233643 }, { "id": "identity_328605", "image": "VQA2.0/images/162578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the area filled with plants?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 7.35546875, "loss": 4.120665550231934 }, { "id": "identity_203775", "image": "PMC-VQA/images/PMC6920276_Fig5_86499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any organs that are adjacent to the fistula?" }, { "from": "gpt", "value": "The answer is Bladder and rectum" } ], "clip_score": 24.9375, "loss": 2.706167697906494 }, { "id": "identity_201171", "image": "A-OKVQA/images/21079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 16.375, "loss": 3.199921131134033 }, { "id": "identity_236020", "image": "TQA/images/question_images/waves_interactions_interference_8232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the resultant amplitude when two waves with amplitudes of 3 units and 4 units undergo destructive interference?" }, { "from": "gpt", "value": "The answer is 1 unit" } ], "clip_score": 29.453125, "loss": 1.530112624168396 }, { "id": "identity_250463", "image": "TextVQA/images/265ef87e5c269ff5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the score difference between the two teams and which team is leading?" }, { "from": "gpt", "value": "The answer is 1, New Jersey Devils" } ], "clip_score": 25.5, "loss": 2.8548154830932617 }, { "id": "identity_131857", "image": "VQA-AS/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the sitting person's pants?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.484375, "loss": 3.656198501586914 }, { "id": "identity_242836", "image": "DocVQA/images/qngh0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the dates for all individuals who signed on 4/23/01?" }, { "from": "gpt", "value": "The answer is Stephen Jakob, D. E. Wilhelm, W. E. Routh, Jr., Alan Caldwell" } ], "clip_score": 24.15625, "loss": 3.4314732551574707 }, { "id": "identity_187868", "image": "A-OKVQA/images/160848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.15625, "loss": 4.239352226257324 }, { "id": "identity_96693", "image": "TabMWP/images/4510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of y when x = 18?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.75, "loss": 2.457746744155884 }, { "id": "identity_211338", "image": "VQA2.0/images/255983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 16.734375, "loss": 4.507629871368408 }, { "id": "identity_13578", "image": "AI2D/images/2109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the food web shown, which of the following is considered a Producer?\nChoices:\n(A) Pronghorn Antelope\n(B) Coyote\n(C) Green Plants\n(D) American Bison" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.34375, "loss": 3.676107883453369 }, { "id": "identity_323675", "image": "A-OKVQA/images/197918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the design called on this shirt?\nChoices:\n(A) polka dot\n(B) striped\n(C) flannel\n(D) plaid" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.890625, "loss": 4.37096643447876 }, { "id": "identity_210556", "image": "A-OKVQA/images/435579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more people standing or sitting in the background?" }, { "from": "gpt", "value": "The answer is Standing" } ], "clip_score": 13.65625, "loss": 2.5836565494537354 }, { "id": "identity_22870", "image": "TabMWP/images/11719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy 2.9 kilograms of pine nuts?" }, { "from": "gpt", "value": "The answer is 8.70" } ], "clip_score": 30.375, "loss": 2.172194480895996 }, { "id": "identity_185257", "image": "PMC-VQA/images/PMC5572699_Fig10_207363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color used to stain the structures in this image?" }, { "from": "gpt", "value": "The answer is The predominant color is red" } ], "clip_score": 23.296875, "loss": 2.578050374984741 }, { "id": "identity_9641", "image": "TabMWP/images/30202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A veterinarian weighed all the dogs that visited her office last month. How many dogs weighed at least 54 pounds?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 32.46875, "loss": 1.9250746965408325 }, { "id": "identity_336114", "image": "AI2D/images/1955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they feed on?\n\nChoices:\n(A) Microzooplankton\n(B) Phytoplankton\n(C) None of the above\n(D) Both of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.5, "loss": 3.8972251415252686 }, { "id": "identity_43814", "image": "MapQA/images/map_10101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does New York have a lower value than the average value of the USA?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.8125, "loss": 2.8793139457702637 }, { "id": "identity_140189", "image": "DVQA/images/bar_train_00105203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the bars labeled \"Charm\" and \"Clock\"?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 23.03125, "loss": 2.2114992141723633 }, { "id": "identity_90887", "image": "Geometry3K/images/1113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is x = 5.8" } ], "clip_score": 36.71875, "loss": 2.1166141033172607 }, { "id": "identity_212012", "image": "A-OKVQA/images/383212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many riders are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.109375, "loss": 2.421905279159546 }, { "id": "identity_91757", "image": "TabMWP/images/11068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the quantity demanded equal the quantity supplied?" }, { "from": "gpt", "value": "The answer is $305" } ], "clip_score": 33.1875, "loss": 2.2750673294067383 }, { "id": "identity_313239", "image": "MapQA/images/map_8514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of this state?" }, { "from": "gpt", "value": "The answer is 377-419" } ], "clip_score": 28.40625, "loss": 2.219531297683716 }, { "id": "identity_272713", "image": "ChartQA/images/1627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bar representing 26% in the TOTAL category?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 12.5390625, "loss": 3.6033530235290527 }, { "id": "identity_133657", "image": "TextVQA/images/27e65a3e7c92352a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color scheme is used in the coffee preference survey on the right?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 26.109375, "loss": 3.0484960079193115 }, { "id": "identity_127741", "image": "VizWiz/images/VizWiz_train_00005525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the logo?" }, { "from": "gpt", "value": "The answer is Crest-like with wings" } ], "clip_score": 23.984375, "loss": 3.663451671600342 }, { "id": "identity_333532", "image": "AI2D/images/4902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which region in the diagram represents this?\nChoices:\n(A) d\n(B) a\n(C) c\n(D) b" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.3515625, "loss": 3.6449618339538574 }, { "id": "identity_116410", "image": "TextVQA/images/eed0e2f40cc4cfc6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky clear or cloudy in the image?" }, { "from": "gpt", "value": "The answer is Clear" } ], "clip_score": 16.3125, "loss": 3.6331489086151123 }, { "id": "identity_88595", "image": "GeoQA+/images/2884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the cone's lateral surface is a sector with a radius of 4 and a central angle of 90°, what is the total surface area of the cone?" }, { "from": "gpt", "value": "The answer is 33π/4" } ], "clip_score": 29.90625, "loss": 2.220668315887451 }, { "id": "identity_69252", "image": "AI2D/images/2114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the decomposers in the food web" }, { "from": "gpt", "value": "The answer is Bacteria and Fungi" } ], "clip_score": 23.359375, "loss": 1.4596260786056519 }, { "id": "identity_133026", "image": "Super-CLEVR/images/superCLEVR_new_000843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the motorcycle located at the bottom left of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.71875, "loss": 3.432213544845581 }, { "id": "identity_154321", "image": "ScienceQA/images/6493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are both objects predominantly?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.140625, "loss": 2.2800769805908203 }, { "id": "identity_99638", "image": "CLEVR-Math/images/CLEVR_train_058771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither green nor cylinders?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.734375, "loss": 2.5595595836639404 }, { "id": "identity_48937", "image": "PlotQA/images/103833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did the net enrollment rate in primary schools of Least developed countries reach its peak?" }, { "from": "gpt", "value": "The answer is The peak is around 2018" } ], "clip_score": 32.96875, "loss": 2.6541364192962646 }, { "id": "identity_104041", "image": "PMC-VQA/images/PMC4236490_Fig2_337300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the abnormality indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Oval" } ], "clip_score": 21.09375, "loss": 3.0105702877044678 }, { "id": "identity_21473", "image": "TextVQA/images/f4c0fbb4f8bfa61a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: where is sagna in this picture?" }, { "from": "gpt", "value": "The answer is bottom right" } ], "clip_score": 25.515625, "loss": 3.1675360202789307 }, { "id": "identity_18072", "image": "DocVQA/images/rtxm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the chairman mentioned ?" }, { "from": "gpt", "value": "The answer is Dr. salah Abu Shakra" } ], "clip_score": 26.796875, "loss": 3.036768674850464 }, { "id": "identity_207962", "image": "A-OKVQA/images/287105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is standing in the doorway?" }, { "from": "gpt", "value": "The answer is Giraffe" } ], "clip_score": 28.703125, "loss": 2.0492923259735107 }, { "id": "identity_317270", "image": "VizWiz/images/VizWiz_train_00005316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you describe this image including any text?" }, { "from": "gpt", "value": "The answer is flyer for matial arts classes" } ], "clip_score": 18.671875, "loss": 4.237438678741455 }, { "id": "identity_333024", "image": "GeoQA+/images/1248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,用半径为10,圆心角为144°的扇形纸片围成一个圆锥(接缝处忽略不计),则该圆锥的底面半径是多少?\n选项:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.21875, "loss": 2.0963191986083984 }, { "id": "identity_60392", "image": "DocVQA/images/jkbb0228_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest mortality rate for all causes around 1900?" }, { "from": "gpt", "value": "The answer is approximately 2000 deaths per 100,000 population" } ], "clip_score": 26.78125, "loss": 1.1372147798538208 }, { "id": "identity_12101", "image": "UniGeo/images/1840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点C是线段AB上一点,点P是AC的中点,点Q是BC的中点,已知线段AC=8cm,线段BC=4cm,则线段PQ为()\nChoices:\n(A) 2cm\n(B) 4cm\n(C) 6cm\n(D) 12cm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.09375, "loss": 2.4400532245635986 }, { "id": "identity_166391", "image": "TabMWP/images/18673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of games won by the Brookfield baseball team over the years shown in the table?" }, { "from": "gpt", "value": "The answer is 116" } ], "clip_score": 43.96875, "loss": 2.2813894748687744 }, { "id": "identity_3188", "image": "ScienceQA/images/5038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Cara's test show?\nChoices:\n(A) whether producing more insulin would help the bacteria grow faster\n(B) whether she added enough nutrients to help the bacteria produce 20% more insulin\n(C) whether different types of bacteria would need different nutrients to produce insulin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.8125, "loss": 3.539140224456787 }, { "id": "identity_290809", "image": "ChartQA/images/3770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of individuals think that Germany is currently playing a more significant role in global affairs compared to a decade ago?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 34.59375, "loss": 2.7975573539733887 }, { "id": "identity_313190", "image": "A-OKVQA/images/553130.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the safest way to slowly enter the water here?\nChoices:\n(A) rolling headfirst\n(B) dive headfirst\n(C) belly flop\n(D) ladder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.2109375, "loss": 4.168666362762451 }, { "id": "identity_105443", "image": "TextVQA/images/5129f8041aa92388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word in the section titled \"Descriptio chaste virginis\"?" }, { "from": "gpt", "value": "The answer is pay" } ], "clip_score": 16.703125, "loss": 5.629376411437988 }, { "id": "identity_310411", "image": "ScienceQA/images/17623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body of water is highlighted?\nChoices:\n(A) the Indian Ocean\n(B) the Arctic Ocean\n(C) the Atlantic Ocean\n(D) the Southern Ocean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.3125, "loss": 3.7213215827941895 }, { "id": "identity_7071", "image": "Super-CLEVR/images/superCLEVR_new_001204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the small red rubber object?" }, { "from": "gpt", "value": "The answer is road bike" } ], "clip_score": 21.828125, "loss": 3.6598262786865234 }, { "id": "identity_205909", "image": "PMC-VQA/images/PMC7500744_FIG1_256789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow arrows are pointing to features in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.203125, "loss": 2.412153959274292 }, { "id": "identity_262482", "image": "VQA-RAD/images/synpic25105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the bowel contain air?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.453125, "loss": 3.7955336570739746 }, { "id": "identity_232077", "image": "FigureQA/images/21461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of Dark Orange to the area of Medium Orchid in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 40.1875, "loss": 2.606241464614868 }, { "id": "identity_211838", "image": "VQA2.0/images/60378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other fruits besides bananas visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, pineapples and watermelons" } ], "clip_score": 26.453125, "loss": 2.4677693843841553 }, { "id": "identity_132076", "image": "VQA-AS/images/7110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the rug on the floor?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.015625, "loss": 3.4651150703430176 }, { "id": "identity_46443", "image": "DVQA/images/bar_train_00045256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented by patterned bars?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.890625, "loss": 2.66276478767395 }, { "id": "identity_321984", "image": "PMC-VQA/images/PMC8405323_fig2_48743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in these images?\nChoices:\n(A) The original surgical site\n(B) Dislodged screws\n(C) Postoperative changes after anterior fusion\n(D) Postoperative changes after posterior fusion" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.5625, "loss": 4.451476097106934 }, { "id": "identity_10133", "image": "AI2D/images/984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: One of the separate, usually green parts forming the calyx of a flower is known as?\nChoices:\n(A) rotate\n(B) sepal\n(C) salverform\n(D) spurred" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5, "loss": 3.8094379901885986 }, { "id": "identity_235467", "image": "A-OKVQA/images/252968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key features in the image that indicate the train is powered by electricity, and how do these features differentiate it from trains powered by steam, battery, or gas?" }, { "from": "gpt", "value": "The answer is Overhead wires and pantograph indicate electric power" } ], "clip_score": 23.96875, "loss": 3.8021886348724365 }, { "id": "identity_313779", "image": "PMC-VQA/images/PMC8609801_Fig1_117453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the three positions included in this movement?\nChoices:\n(A) Thoracic extension, lumbar extension, hip extension.\n(B) Thoracic flexion, lumbar extension, hip flexion.\n(C) Thoracic flexion, lumbar flexion, hip extension.\n(D) Thoracic extension, lumbar flexion, hip flexion." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.796875, "loss": 3.6087329387664795 }, { "id": "identity_209", "image": "A-OKVQA/images/59361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animals can be seen?\nChoices:\n(A) cows\n(B) antelopes\n(C) giraffes\n(D) horses" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 3.999990463256836 }, { "id": "identity_142657", "image": "ChartQA/images/91568440017243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the data presented in the image?" }, { "from": "gpt", "value": "The answer is J. Doyne Farmer and François Lafond (2016)" } ], "clip_score": 13.65625, "loss": 1.3929983377456665 }, { "id": "identity_61734", "image": "MapQA/images/map_7038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of New York?" }, { "from": "gpt", "value": "The answer is 3957917-5616284" } ], "clip_score": 23.125, "loss": 2.5914177894592285 }, { "id": "identity_234235", "image": "PMC-VQA/images/PMC4328627_F1_358320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the visibility of the MI region in both SSFP and LGE imaging techniques for both Event+ and Event- cases. Which imaging technique provides better contrast for the MI region in Event+ cases?" }, { "from": "gpt", "value": "The answer is SSFP imaging" } ], "clip_score": 35.0, "loss": 2.7805070877075195 }, { "id": "identity_325758", "image": "GeoQA+/images/2452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB∥CD∥EF,且AO=OD=DF,OE=6,则BE是多少?\nChoices:\n(A) 9\n(B) 10\n(C) 11\n(D) 12" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.125, "loss": 2.3101532459259033 }, { "id": "identity_118482", "image": "PMC-VQA/images/PMC8961915_Fig3_237822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique was used to create this image?" }, { "from": "gpt", "value": "The answer is CT scan or MRI" } ], "clip_score": 29.1875, "loss": 2.6737546920776367 }, { "id": "identity_166951", "image": "IconQA/images/100792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of pink to non-pink segments?" }, { "from": "gpt", "value": "The answer is 8:4 or 2:1 simplified" } ], "clip_score": 36.5625, "loss": 2.407672166824341 }, { "id": "identity_136152", "image": "DVQA/images/bar_train_00118962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy value of the 'hay' dataset for the 'folk' algorithm?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.578125, "loss": 2.9835636615753174 }, { "id": "identity_246297", "image": "VQA2.0/images/252022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the streets shown in the photo, and what is the year mentioned on the stop sign?" }, { "from": "gpt", "value": "The answer is Gore Ave, Union St, 2006" } ], "clip_score": 30.859375, "loss": 1.9798294305801392 }, { "id": "identity_147325", "image": "MapQA/images/map_10072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the legend?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 15.9765625, "loss": 4.401217460632324 }, { "id": "identity_212919", "image": "VizWiz/images/VizWiz_train_00015983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the version number of the Resource CD?" }, { "from": "gpt", "value": "The answer is 2.0" } ], "clip_score": 20.34375, "loss": 2.360037326812744 }, { "id": "identity_239566", "image": "Geometry3K/images/1849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\(\\angle C\\)" }, { "from": "gpt", "value": "The answer is 127" } ], "clip_score": 29.5625, "loss": 3.6598622798919678 }, { "id": "identity_295088", "image": "VQA2.0/images/380474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it not necessary for him to lose some weight?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 11.875, "loss": 2.686418056488037 }, { "id": "identity_139201", "image": "ChartQA/images/multi_col_20032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vehicle type had the highest average age in 2017?" }, { "from": "gpt", "value": "The answer is Trucks" } ], "clip_score": 26.71875, "loss": 2.7621078491210938 }, { "id": "identity_40078", "image": "FigureQA/images/13344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal sections for each color?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 32.34375, "loss": 3.317902088165283 }, { "id": "identity_301360", "image": "UniGeo/images/3363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示, A、B、C是⊙O上的三点,若∠C=40°,则∠AOB的度数是多少?\nChoices:\n(A) 40°\n(B) 50°\n(C) 55°\n(D) 80°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.53125, "loss": 2.809840440750122 }, { "id": "identity_176404", "image": "IconQA/images/20693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object have a circular part?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.203125, "loss": 3.4737389087677 }, { "id": "identity_15213", "image": "TQA/images/question_images/seasons_2757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Equinox can occur on the following date:\nChoices:\n(A) June 21\n(B) December 21\n(C) June 20\n(D) 20-Mar" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.234375, "loss": 3.836517333984375 }, { "id": "identity_136965", "image": "DVQA/images/bar_train_00043858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values represented on the y-axis?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.34375, "loss": 2.4252777099609375 }, { "id": "identity_306634", "image": "ScienceQA/images/328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Baton Rouge\n(B) Richmond\n(C) Arlington\n(D) Norfolk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.765625, "loss": 4.10776948928833 }, { "id": "identity_131699", "image": "Super-CLEVR/images/superCLEVR_new_000082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.421875, "loss": 2.4814364910125732 }, { "id": "identity_20880", "image": "DocVQA/images/qhjl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Agenda number 3?" }, { "from": "gpt", "value": "The answer is American's advertisement." } ], "clip_score": 23.140625, "loss": 2.7074623107910156 }, { "id": "identity_338157", "image": "AI2D/images/2664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the phase of the moon occurring when it is between the earth and the sun and is not visible or only visible as a thin crescent at sunset?\nChoices:\n(A) I\n(B) D\n(C) F\n(D) B" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.828125, "loss": 3.684291124343872 }, { "id": "identity_316742", "image": "ScienceQA/images/6519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific humidity level recorded in the circled region?\nChoices:\n(A) 9 grams of water vapor per kilogram of air\n(B) 10 grams of water vapor per kilogram of air\n(C) 2 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.953125, "loss": 4.03168249130249 }, { "id": "identity_29998", "image": "PMC-VQA/images/PMC3038968_F1_87161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the abdominal CT reveal about the tumor?\nChoices:\n(A) It is located in the upper abdomen \n(B) It is a small, even tumor \n(C) It is large and uneven \n(D) It is located in the back " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.953125, "loss": 4.039518356323242 }, { "id": "identity_246326", "image": "TabMWP/images/6329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each plate has 5 biscuits, how many biscuits are on 10 plates, and how many more biscuits are there on 10 plates compared to 6 plates?" }, { "from": "gpt", "value": "The answer is 50 biscuits on 10 plates and 20 more biscuits" } ], "clip_score": 36.59375, "loss": 1.1050597429275513 }, { "id": "identity_333897", "image": "PMC-VQA/images/PMC7393207_F2_222998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of imaging used in this study?\nChoices:\n(A) Computed Tomography\n(B) Magnetic Resonance Imaging\n(C) X-Ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.265625, "loss": 4.3995256423950195 }, { "id": "identity_218645", "image": "Super-CLEVR/images/superCLEVR_new_000824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object that is not a vehicle?" }, { "from": "gpt", "value": "The answer is No, all objects are vehicles" } ], "clip_score": 24.25, "loss": 2.3463692665100098 }, { "id": "identity_282389", "image": "A-OKVQA/images/166474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brand is the manufacturer of the white shoes?\nChoices:\n(A) Under Armour\n(B) Nike\n(C) Adidas\n(D) New Balance" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.0703125, "loss": 3.9309661388397217 }, { "id": "identity_108076", "image": "PMC-VQA/images/PMC7898273_rcr2710-fig-0002_372020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign indicated by the red arrows in the image?" }, { "from": "gpt", "value": "The answer is Ground-glass opacity" } ], "clip_score": 28.140625, "loss": 3.8682076930999756 }, { "id": "identity_91311", "image": "GeoQA+/images/5526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点O是△ABC外接圆的圆心,连接OA,若∠1=37°,则∠BOA的度数是多少?" }, { "from": "gpt", "value": "The answer is greater than 74°" } ], "clip_score": 32.0625, "loss": 2.9723422527313232 }, { "id": "identity_203052", "image": "PMC-VQA/images/PMC8087373_gf03_431597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is shown in the image?" }, { "from": "gpt", "value": "The answer is Stratified squamous epithelium" } ], "clip_score": 32.9375, "loss": 2.0835325717926025 }, { "id": "identity_239867", "image": "DocVQA/images/nlcd0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document on page 5?" }, { "from": "gpt", "value": "The answer is OFFICIAL FOOD AND DRUG ADMINISTRATION TOLERANCES" } ], "clip_score": 35.75, "loss": 0.984763503074646 }, { "id": "identity_73447", "image": "TQA/images/abc_question_images/parts_telescope_17291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled 'M' in the diagram?" }, { "from": "gpt", "value": "The answer is It moves the stage up and down for focusing" } ], "clip_score": 25.984375, "loss": 3.057326555252075 }, { "id": "identity_244301", "image": "IconQA/images/72968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jack starts writing his letter at 6:00 P.M. and finishes it 2 hours and 45 minutes later, what time does he finish writing the letter?" }, { "from": "gpt", "value": "The answer is 8:45 P.M" } ], "clip_score": 26.4375, "loss": 1.151664137840271 }, { "id": "identity_265239", "image": "DVQA/images/bar_train_00156143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of accuracy of the algorithm theme within the dataset pay?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.65625, "loss": 3.334599256515503 }, { "id": "identity_87046", "image": "GeoQA+/images/2171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠CAB = 40° in the inscribed triangle ABC of circle O, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 37.625, "loss": 1.8119491338729858 }, { "id": "identity_146093", "image": "MapQA/images/map_6201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have bids in the range of 69850-71465?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.90625, "loss": 2.4511830806732178 }, { "id": "identity_128130", "image": "VQA-AS/images/19989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of furniture are in the room?" }, { "from": "gpt", "value": "The answer is There are two pieces of furniture" } ], "clip_score": 25.859375, "loss": 2.6267175674438477 }, { "id": "identity_47820", "image": "FigureQA/images/20844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Peru color in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 16.67%" } ], "clip_score": 34.75, "loss": 1.6688288450241089 }, { "id": "identity_312617", "image": "TextVQA/images/13d294b93d12e9ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dosage of the medication in the blue box?" }, { "from": "gpt", "value": "The answer is 500" } ], "clip_score": 27.359375, "loss": 2.6378817558288574 }, { "id": "identity_3120", "image": "ScienceQA/images/8274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Hawaii?\nChoices:\n(A) Olympia\n(B) Portland\n(C) Santa Fe\n(D) Honolulu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.515625, "loss": 3.9701526165008545 }, { "id": "identity_191574", "image": "PMC-VQA/images/PMC7567400_f2_276956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the brain shows more highlighted areas in the scan?" }, { "from": "gpt", "value": "The answer is Right" } ], "clip_score": 33.75, "loss": 3.65983510017395 }, { "id": "identity_228992", "image": "Geometry3K/images/1662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In \\(\\odot R\\), given \\(SU = 20\\), \\(YW = 20\\), and \\(m \\widehat{YX} = 45^\\circ\\), find the length of \\(ST\\)" }, { "from": "gpt", "value": "The answer is \\(10\\sqrt{2}\\)" } ], "clip_score": 21.28125, "loss": 1.6984671354293823 }, { "id": "identity_316289", "image": "Geometry3K/images/1230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x given that AD is an altitude of the triangle.\n\nChoices:\n(A) 12\n(B) 18\n(C) 22\n(D) 24" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.09375, "loss": 2.1412513256073 }, { "id": "identity_175182", "image": "IconQA/images/33891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the height of the tallest rectangle to the height of the shortest rectangle?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 23.546875, "loss": 1.4167040586471558 }, { "id": "identity_336474", "image": "IconQA/images/41404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure this. This is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.078125, "loss": 2.586158037185669 }, { "id": "identity_91241", "image": "Geometry3K/images/1480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of w in the parallelogram M N P R?" }, { "from": "gpt", "value": "The answer is 52.25" } ], "clip_score": 34.375, "loss": 2.0991880893707275 }, { "id": "identity_303502", "image": "TextVQA/images/bbe7d74261f667a5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contact number?" }, { "from": "gpt", "value": "The answer is 5142528037" } ], "clip_score": 14.7109375, "loss": 1.9950002431869507 }, { "id": "identity_125839", "image": "A-OKVQA/images/447574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of soup is in the bowl?" }, { "from": "gpt", "value": "The answer is Potato leek soup" } ], "clip_score": 15.6953125, "loss": 2.96368408203125 }, { "id": "identity_281808", "image": "ScienceQA/images/12582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sample has a higher temperature based on the comparison of the average kinetic energies of the particles in each sample?\n(A) sample A\n(B) sample B\n(C) neither; the samples have the same temperature" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.4375, "loss": 2.8064513206481934 }, { "id": "identity_268620", "image": "UniGeo/images/1137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, quadrilateral ABCD is circumscribed around circle O. If ∠BOD = 138°, what is the measure of its exterior angle ∠DCE?\nChoices:\n(A) 69°\n(B) 42°\n(C) 48°\n(D) 38°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.5, "loss": 2.155958890914917 }, { "id": "identity_98426", "image": "IconQA/images/82010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fish have a combination of yellow and red colors?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.90625, "loss": 2.318136692047119 }, { "id": "identity_46755", "image": "FigureQA/images/98706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Purple line ever intersect with the Gray line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.703125, "loss": 2.9368114471435547 }, { "id": "identity_309689", "image": "TQA/images/abc_question_images/human_system_eye_16134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which one is the cornea?\nChoices:\n(A) R\n(B) L\n(C) W\n(D) J" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 4.3268609046936035 }, { "id": "identity_177529", "image": "TabMWP/images/15061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the least preferred among the respondents?" }, { "from": "gpt", "value": "The answer is Checking personal e-mail" } ], "clip_score": 30.9375, "loss": 1.9083601236343384 }, { "id": "identity_232985", "image": "IconQA/images/17442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices are there in total for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 27.6875, "loss": 1.8708242177963257 }, { "id": "identity_140170", "image": "DocVQA/images/rjxn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount billed on 11/10/15?" }, { "from": "gpt", "value": "The answer is $5,000" } ], "clip_score": 26.90625, "loss": 2.156926155090332 }, { "id": "identity_66114", "image": "ScienceQA/images/10430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the initial temperature was 10°C instead of 4°C, what would be the new final temperature for the greenhouse without solar panels?" }, { "from": "gpt", "value": "The answer is 28°C" } ], "clip_score": 28.15625, "loss": 2.069999933242798 }, { "id": "identity_176516", "image": "IconQA/images/13798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trucks have their front facing the left side of the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.28125, "loss": 2.315492868423462 }, { "id": "identity_40838", "image": "DocVQA/images/fnpy0000_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the delivery date for this order?" }, { "from": "gpt", "value": "The answer is 06/30/97" } ], "clip_score": 22.90625, "loss": 1.8588403463363647 }, { "id": "identity_121357", "image": "Super-CLEVR/images/superCLEVR_new_000849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the green motorbike have?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.4375, "loss": 1.9365276098251343 }, { "id": "identity_101477", "image": "TabMWP/images/28425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Ernest need to buy 3 black jackets, 2 blue sweaters, and 4 denim jackets?" }, { "from": "gpt", "value": "The answer is $174.50" } ], "clip_score": 37.6875, "loss": 1.9579854011535645 }, { "id": "identity_37318", "image": "TQA/images/question_images/tides_136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phase of the moon is happening when there are neap tides?\nChoices:\n(A) new moon\n(B) first quarter\n(C) full moon\n(D) third quarter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.5625, "loss": 3.485644578933716 }, { "id": "identity_93211", "image": "IconQA/images/53811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the black marbles, how many marbles will be left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.828125, "loss": 2.4465060234069824 }, { "id": "identity_268887", "image": "UniGeo/images/2553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, two straight roads l1 and l2 intersect at point O. Villagers from village C are building three factories A, B, and D along the roads, with AB=BC=CD=DA=5 kilometers. If the distance from village C to road l1 is 4 kilometers, what is the distance from village C to road l2?\nChoices:\n(A) 6 kilometers\n(B) 5 kilometers\n(C) 4 kilometers\n(D) 3 kilometers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.015625, "loss": 2.5874972343444824 }, { "id": "identity_89383", "image": "UniGeo/images/77.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle CDE in the given diagram?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 31.03125, "loss": 1.652362585067749 }, { "id": "identity_89153", "image": "UniGeo/images/3153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the right triangle ABC, if AB = 5 and AC = 4, what is the area of the triangle?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.0625, "loss": 2.804180145263672 }, { "id": "identity_67452", "image": "AI2D/images/1014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Stigma\"?" }, { "from": "gpt", "value": "The answer is Traps pollen" } ], "clip_score": 27.328125, "loss": 2.6532094478607178 }, { "id": "identity_279110", "image": "FigureQA/images/56111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.765625, "loss": 4.47091817855835 }, { "id": "identity_200488", "image": "PMC-VQA/images/PMC5033377_pone.0162969.g002_76274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in figure D'?" }, { "from": "gpt", "value": "The answer is Likely dorsal-ventral orientation, but not explicitly labeled" } ], "clip_score": 20.640625, "loss": 3.10329270362854 }, { "id": "identity_76795", "image": "AI2D/images/3493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled \"Golgi apparatus\"?" }, { "from": "gpt", "value": "The answer is Golgi apparatus" } ], "clip_score": 25.265625, "loss": 1.7740514278411865 }, { "id": "identity_19561", "image": "AI2D/images/1220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost layer called?\nChoices:\n(A) cell wall\n(B) vacuole\n(C) none of the above\n(D) golgi body" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 4.125794887542725 }, { "id": "identity_262149", "image": "DVQA/images/bar_train_00198554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the income value represented by the curve?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 20.03125, "loss": 2.8561460971832275 }, { "id": "identity_67128", "image": "ScienceQA/images/12474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which producer is a food source for the most number of consumers in this ecosystem?" }, { "from": "gpt", "value": "The answer is Bear sedge" } ], "clip_score": 24.46875, "loss": 3.029731035232544 }, { "id": "identity_317694", "image": "GeoQA+/images/3815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图是两个全等三角形,图中的字母表示三角形的边长,则∠1的度数是多少?\nChoices:\n(A) 62°\n(B) 72°\n(C) 76°\n(D) 66°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 3.1093966960906982 }, { "id": "identity_24440", "image": "ChartQA/images/multi_col_1471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the most popular product in 2019?" }, { "from": "gpt", "value": "The answer is Television product" } ], "clip_score": 27.53125, "loss": 2.6347601413726807 }, { "id": "identity_102309", "image": "CLEVR-Math/images/CLEVR_train_003555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a cylindrical shape?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.0, "loss": 2.5803956985473633 }, { "id": "identity_75654", "image": "ScienceQA/images/18526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in this food web is a decomposer?" }, { "from": "gpt", "value": "The answer is Mushroom" } ], "clip_score": 29.984375, "loss": 2.290468454360962 }, { "id": "identity_281207", "image": "IconQA/images/41204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) rotate\n(B) shift\n(C) invert" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 4.375509262084961 }, { "id": "identity_46637", "image": "FigureQA/images/62924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Dark Green at the 50 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is 92" } ], "clip_score": 26.53125, "loss": 3.1061787605285645 }, { "id": "identity_160588", "image": "GeoQA+/images/2373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the obtuse angle formed by line b and the transversal?" }, { "from": "gpt", "value": "The answer is 110 degrees" } ], "clip_score": 32.28125, "loss": 1.8525662422180176 }, { "id": "identity_141264", "image": "DocVQA/images/mpcl0037_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proposal number mentioned in the document?" }, { "from": "gpt", "value": "The answer is R9044" } ], "clip_score": 23.78125, "loss": 3.8129518032073975 }, { "id": "identity_240827", "image": "DVQA/images/bar_train_00117218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'harm' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is 3.67" } ], "clip_score": 29.875, "loss": 1.8536205291748047 }, { "id": "identity_222060", "image": "PMC-VQA/images/PMC7993516_Fig1_399517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lung fields clear or is there any sign of lung pathology?" }, { "from": "gpt", "value": "The answer is Lung fields are clear" } ], "clip_score": 24.34375, "loss": 2.268366575241089 }, { "id": "identity_251482", "image": "Geometry3K/images/1555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the perimeter of triangle WXY, if Z X = 20, T Y = 15, and W T = 5" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 35.625, "loss": 2.149944543838501 }, { "id": "identity_334939", "image": "VQA2.0/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the individuals shown of a specific descent?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 9.2109375, "loss": 3.182961940765381 }, { "id": "identity_24712", "image": "TextVQA/images/572e6271daf518c0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what device is the villain holding?" }, { "from": "gpt", "value": "The answer is curative ray" } ], "clip_score": 23.0625, "loss": 3.3649697303771973 }, { "id": "identity_60339", "image": "DVQA/images/bar_train_00082729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value in the 'crime' group?" }, { "from": "gpt", "value": "The answer is pastor" } ], "clip_score": 31.671875, "loss": 3.1497974395751953 }, { "id": "identity_125144", "image": "VQA2.0/images/434894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the wine bottle?" }, { "from": "gpt", "value": "The answer is Tasca" } ], "clip_score": 9.53125, "loss": 4.890564441680908 }, { "id": "identity_84458", "image": "GeoQA+/images/5134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the water's maximum depth is 8cm, what is the height of the empty part of the cylinder if the cylinder's height is 20cm?" }, { "from": "gpt", "value": "The answer is 12cm" } ], "clip_score": 31.03125, "loss": 1.6212058067321777 }, { "id": "identity_271897", "image": "FigureQA/images/18669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Khaki or Steel Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.125, "loss": 4.417123794555664 }, { "id": "identity_130452", "image": "TextVQA/images/38b41a05d0830f97.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you find a book with a picture of a building on its cover?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.265625, "loss": 2.4954352378845215 }, { "id": "identity_100178", "image": "IconQA/images/101749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time difference between when Chase and Ed got the mail?" }, { "from": "gpt", "value": "The answer is 1 hour" } ], "clip_score": 23.4375, "loss": 2.2897465229034424 }, { "id": "identity_126579", "image": "VQA-RAD/images/synpic31256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any rib fractures in the image?" }, { "from": "gpt", "value": "The answer is No rib fractures identified" } ], "clip_score": 28.125, "loss": 2.9114537239074707 }, { "id": "identity_112898", "image": "TextVQA/images/8774ea9366a4c286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many \"ONE WAY\" signs are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.390625, "loss": 2.1837072372436523 }, { "id": "identity_151254", "image": "AI2D/images/4562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the plant is the first to emerge from the seed during germination?" }, { "from": "gpt", "value": "The answer is Radicle" } ], "clip_score": 30.46875, "loss": 2.669532537460327 }, { "id": "identity_275052", "image": "UniGeo/images/1247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point P is outside circle O. PA and PB are tangents to circle O at points A and B respectively. AB and OP are connected, and OP intersects circle O at point D. If ∠APB = 56°, what is the measure of ∠ABD?\nChoices:\n(A) 28°\n(B) 30°\n(C) 31°\n(D) 34°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.46875, "loss": 2.4315178394317627 }, { "id": "identity_58520", "image": "DVQA/images/bar_train_00071482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a higher preference in the 'sport' category than in the 'angle' category?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.53125, "loss": 2.4080991744995117 }, { "id": "identity_139829", "image": "ChartQA/images/8742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage points between the favorable responses of the U.S. and Canada?" }, { "from": "gpt", "value": "The answer is 3%" } ], "clip_score": 20.453125, "loss": 2.6155946254730225 }, { "id": "identity_267405", "image": "TabMWP/images/10915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Alexa requires to purchase a grilled steak and spaghetti with meatballs?" }, { "from": "gpt", "value": "The answer is 17.12" } ], "clip_score": 26.765625, "loss": 2.153806686401367 }, { "id": "identity_250412", "image": "IconQA/images/79009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sequence of transformations (slide, turn, flip) can be applied to the letter 'R' to achieve the final position shown in the image?" }, { "from": "gpt", "value": "The answer is Slide" } ], "clip_score": 28.03125, "loss": 3.4927425384521484 }, { "id": "identity_54907", "image": "FigureQA/images/4514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Pale Green portion represent more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 39.28125, "loss": 3.2284016609191895 }, { "id": "identity_294440", "image": "VQA2.0/images/205243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sails that are currently open on the sailboats?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 21.84375, "loss": 1.4866212606430054 }, { "id": "identity_182031", "image": "VQA2.0/images/311208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a child present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.4765625, "loss": 3.2644317150115967 }, { "id": "identity_252049", "image": "ScienceQA/images/13711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the reptile and describe three distinct physical characteristics that differentiate it from an amphibian" }, { "from": "gpt", "value": "The answer is Mojave rattlesnake, scaly skin, no limbs, terrestrial habitat" } ], "clip_score": 23.703125, "loss": 3.212123394012451 }, { "id": "identity_335729", "image": "VQA2.0/images/528931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these animals are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.828125, "loss": 2.169785737991333 }, { "id": "identity_71262", "image": "AI2D/images/2008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the ants in this food chain?" }, { "from": "gpt", "value": "The answer is Primary consumers" } ], "clip_score": 26.8125, "loss": 4.228022575378418 }, { "id": "identity_19575", "image": "FigureQA/images/43981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5625, "loss": 4.732788562774658 }, { "id": "identity_84718", "image": "UniGeo/images/105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle EFG in the given diagram?" }, { "from": "gpt", "value": "The answer is 80 degrees" } ], "clip_score": 33.71875, "loss": 2.162200450897217 }, { "id": "identity_240247", "image": "GeoQA+/images/1662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a∥b,∠1=85°,∠2=35°,若∠4是∠3的补角,求∠4的度数。" }, { "from": "gpt", "value": "The answer is 120°" } ], "clip_score": 33.96875, "loss": 1.7141715288162231 }, { "id": "identity_254165", "image": "PMC-VQA/images/PMC6572828_Fig2_495060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the angiogram findings, what is the likely diagnosis and what are the potential treatment options?" }, { "from": "gpt", "value": "The answer is Diagnosis depends on the condition; treatments vary accordingly" } ], "clip_score": 22.46875, "loss": 3.3969035148620605 }, { "id": "identity_204823", "image": "VQA2.0/images/562575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.4375, "loss": 3.568526029586792 }, { "id": "identity_211313", "image": "PMC-VQA/images/PMC6947870_Fig1_96263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any developmental dysplasia of the hip be seen?" }, { "from": "gpt", "value": "The answer is No developmental dysplasia of the hip is detected" } ], "clip_score": 33.125, "loss": 1.934163212776184 }, { "id": "identity_83844", "image": "UniGeo/images/2777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that points A, B, C, and D are on circle O, angle ABC is 90 degrees, AD is 12, and CD is 5, find the length of segment AC" }, { "from": "gpt", "value": "The answer is sqrt(119)" } ], "clip_score": 33.75, "loss": 2.064300775527954 }, { "id": "identity_296535", "image": "VQA2.0/images/237886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of people that can wash their hands simultaneously in this place?" }, { "from": "gpt", "value": "The answer is many" } ], "clip_score": 22.5, "loss": 4.458502769470215 }, { "id": "identity_158331", "image": "TQA/images/question_images/human_system_excretory_6106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"ureter\"?" }, { "from": "gpt", "value": "The answer is Carries urine from kidney to bladder" } ], "clip_score": 38.0, "loss": 1.60722017288208 }, { "id": "identity_202080", "image": "PMC-VQA/images/PMC5893487_Fig21_299403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 30.59375, "loss": 2.256201982498169 }, { "id": "identity_145784", "image": "DocVQA/images/hpbl0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Vice Chancellor-Academic Affairs" } ], "clip_score": 20.390625, "loss": 2.0184412002563477 }, { "id": "identity_186553", "image": "TextVQA/images/003d25460307a847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the shirt worn by the person in the center?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.1171875, "loss": 3.7401649951934814 }, { "id": "identity_219606", "image": "A-OKVQA/images/68594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is the person in the image wearing?" }, { "from": "gpt", "value": "The answer is A striped shirt" } ], "clip_score": 16.28125, "loss": 2.9024007320404053 }, { "id": "identity_202913", "image": "PMC-VQA/images/PMC6206386_Fig4_387569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific part of the brain is indicated by the arrow in the image?" }, { "from": "gpt", "value": "The answer is Basal ganglia" } ], "clip_score": 32.4375, "loss": 2.9451675415039062 }, { "id": "identity_196632", "image": "A-OKVQA/images/334760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the ground?" }, { "from": "gpt", "value": "The answer is Sandy or dirt ground" } ], "clip_score": 15.9453125, "loss": 2.6081767082214355 }, { "id": "identity_109952", "image": "VQA2.0/images/304091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the horse's mane?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.109375, "loss": 3.5681490898132324 }, { "id": "identity_23495", "image": "TextVQA/images/cd11c711755c9e7c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the number on the jersey?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 19.046875, "loss": 2.050931930541992 }, { "id": "identity_165412", "image": "Geometry3K/images/90.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m \\angle 14 = 130, find the measure of \\angle 6" }, { "from": "gpt", "value": "The answer is 130" } ], "clip_score": 27.671875, "loss": 1.7591159343719482 }, { "id": "identity_336231", "image": "PMC-VQA/images/PMC4418099_Fig1_383036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the hematoma located in this patient's brain?\nChoices:\n(A) Right frontal scalp.\n(B) Left frontal scalp.\n(C) Occipital scalp.\n(D) Temporal scalp." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 4.001284599304199 }, { "id": "identity_282202", "image": "VQA-AS/images/3861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object that the young boy is holding tightly?" }, { "from": "gpt", "value": "The answer is book" } ], "clip_score": 22.5625, "loss": 3.631301164627075 }, { "id": "identity_33353", "image": "PMC-VQA/images/PMC5984782_Fig3_323393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the difference between images e and f? \nChoices:\n(A) They are the same \n(B) Image e was taken from live CAM and f from fixed and sectioned CAM \n(C) Image e was taken from fixed CAM and f from live CAM \n(D) Image e was taken with a different microscope than image f. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.65625, "loss": 3.8198351860046387 }, { "id": "identity_295934", "image": "DocVQA/images/xfhb0228_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which title appears first in the document?" }, { "from": "gpt", "value": "The answer is clerical section" } ], "clip_score": 22.109375, "loss": 3.448333978652954 }, { "id": "identity_223545", "image": "Super-CLEVR/images/superCLEVR_new_001358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the large red bicycle?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 32.625, "loss": 4.411762714385986 }, { "id": "identity_87239", "image": "UniGeo/images/409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠D=32° and BC is the diameter, what is the measure of ∠DOB?" }, { "from": "gpt", "value": "The answer is 116°" } ], "clip_score": 33.8125, "loss": 1.8295451402664185 }, { "id": "identity_246878", "image": "A-OKVQA/images/292697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual images are used to create this collage, and what is the ratio of images showing players in action to the total number of images?" }, { "from": "gpt", "value": "The answer is 9/13" } ], "clip_score": 28.6875, "loss": 3.1870415210723877 }, { "id": "identity_103271", "image": "CLEVR-Math/images/CLEVR_train_065762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither blue nor green?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.125, "loss": 2.547240972518921 }, { "id": "identity_265083", "image": "TabMWP/images/7526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the information provided in the table, does this relation qualify as a function?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 3.367224931716919 }, { "id": "identity_296897", "image": "DocVQA/images/pgxg0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the timing for Dallas/Fort Worth (DTC)?" }, { "from": "gpt", "value": "The answer is April 2000" } ], "clip_score": 20.578125, "loss": 2.0307695865631104 }, { "id": "identity_12430", "image": "A-OKVQA/images/257830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are visible here?\nChoices:\n(A) palms\n(B) deciduous\n(C) olives\n(D) conifers" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 6.72265625, "loss": 3.968672513961792 }, { "id": "identity_271535", "image": "DocVQA/images/xjhk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total amount spent on taxis?" }, { "from": "gpt", "value": "The answer is 2.50" } ], "clip_score": 23.578125, "loss": 2.088343381881714 }, { "id": "identity_45381", "image": "DVQA/images/bar_train_00189164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the preference for 'break' equal in both 'pipe' and 'leader' categories?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.8125, "loss": 3.282241106033325 }, { "id": "identity_248710", "image": "VizWiz/images/VizWiz_train_00002105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the current temperature and the temperature if it increases by 5 degrees?" }, { "from": "gpt", "value": "The answer is 5 degrees" } ], "clip_score": 25.578125, "loss": 2.3631489276885986 }, { "id": "identity_306675", "image": "TextVQA/images/5f5aef36f114d33b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: This is the seal of which place?" }, { "from": "gpt", "value": "The answer is united states" } ], "clip_score": 28.390625, "loss": 2.3186190128326416 }, { "id": "identity_66832", "image": "ScienceQA/images/3537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the plant is directly below the petals in the given image?" }, { "from": "gpt", "value": "The answer is Sepal" } ], "clip_score": 30.46875, "loss": 4.023300647735596 }, { "id": "identity_31406", "image": "PMC-VQA/images/PMC2562460_F6_28479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of muscles are present in the anterior part? \nChoices:\n(A) transverse muscles \n(B) diagonal muscles \n(C) circular muscles \n(D) longitudinal muscles " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3046875, "loss": 4.374789237976074 }, { "id": "identity_284251", "image": "DVQA/images/bar_train_00174079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars in each group?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 20.203125, "loss": 3.25508451461792 }, { "id": "identity_101194", "image": "IconQA/images/5404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one block from the top of each column, how many blocks will remain?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 31.734375, "loss": 2.2560842037200928 }, { "id": "identity_232922", "image": "VizWiz/images/VizWiz_train_00016446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the room indicated by this sign, and what are the visual elements used to convey this purpose?" }, { "from": "gpt", "value": "The answer is Women's restroom, indicated by text and female figure symbol" } ], "clip_score": 31.921875, "loss": 3.3110475540161133 }, { "id": "identity_238900", "image": "ScienceQA/images/20805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and then name its capital. Additionally, list the capitals of the states directly bordering it" }, { "from": "gpt", "value": "The answer is Topeka, Lincoln, Jefferson City, Oklahoma City, Denver" } ], "clip_score": 32.65625, "loss": 2.320587396621704 }, { "id": "identity_142827", "image": "DVQA/images/bar_train_00150665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the \"count\" and \"volume\" categories in the \"horror\" group?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.15625, "loss": 2.738478899002075 }, { "id": "identity_203658", "image": "PMC-VQA/images/PMC2812769_F0001_55611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a fracture in the area indicated by the white arrow?" }, { "from": "gpt", "value": "The answer is No evidence of fracture" } ], "clip_score": 27.078125, "loss": 2.955441474914551 }, { "id": "identity_125122", "image": "PMC-VQA/images/PMC4269278_fig1_345313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general shape of the abnormalities observed in both breasts?" }, { "from": "gpt", "value": "The answer is Irregular and non-uniform" } ], "clip_score": 22.1875, "loss": 4.037035942077637 }, { "id": "identity_331943", "image": "IconQA/images/34525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these shapes are there?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 27.34375, "loss": 2.0461347103118896 }, { "id": "identity_19671", "image": "A-OKVQA/images/402041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who is the trainer?\nChoices:\n(A) first woman\n(B) man\n(C) second woman\n(D) third woman" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.2421875, "loss": 4.128290176391602 }, { "id": "identity_316187", "image": "UniGeo/images/2486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,将一个角为60°的直角三角板的顶点放在圆心O上,斜边和一直角边分别与圆O相交于A、B两点,P是弧AB上任意一点(与A、B不重合),则∠APB=()\n\n选择:\n(A) 15°\n(B) 30°\n(C) 45°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.359375, "loss": 2.661168336868286 }, { "id": "identity_257405", "image": "GeoQA+/images/5801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,AD为⊙O的直径,交BC于点E,若DE=2,OE=3,求△ADE的面积。" }, { "from": "gpt", "value": "The answer is \\( 2\\sqrt{13} \\)" } ], "clip_score": 30.828125, "loss": 2.4277379512786865 }, { "id": "identity_70677", "image": "ScienceQA/images/3624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely purpose of the liquid in the container?" }, { "from": "gpt", "value": "The answer is Culturing microorganisms" } ], "clip_score": 28.28125, "loss": 3.863565683364868 }, { "id": "identity_109027", "image": "VQA2.0/images/116650.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.578125, "loss": 2.3159873485565186 }, { "id": "identity_143130", "image": "DocVQA/images/ntlg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the account number associated with Frances Ellen Brock?" }, { "from": "gpt", "value": "The answer is 22 3235 11" } ], "clip_score": 22.46875, "loss": 2.7307090759277344 }, { "id": "identity_281698", "image": "PMC-VQA/images/PMC5823685_mgg3351-fig-0001_279467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What area of the brain is impacted by bilateral polymicrogyria?\nChoices:\n(A) Cerebellum \n(B) Occipital lobe \n(C) Parietal lobe \n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.265625, "loss": 3.734391450881958 }, { "id": "identity_146670", "image": "DocVQA/images/sllk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage reduction in dental caries increment estimated?" }, { "from": "gpt", "value": "The answer is about 25 percent" } ], "clip_score": 16.328125, "loss": 2.55869197845459 }, { "id": "identity_330928", "image": "ScienceQA/images/6816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.984375, "loss": 3.5655016899108887 }, { "id": "identity_16166", "image": "DocVQA/images/qghv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the president and Chief Executive Officer of Scripps Clinic & Research Foundation?" }, { "from": "gpt", "value": "The answer is Charles C. Edwards, M.D." } ], "clip_score": 18.078125, "loss": 1.9386833906173706 }, { "id": "identity_190202", "image": "A-OKVQA/images/233744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the ground?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 13.265625, "loss": 2.130307674407959 }, { "id": "identity_151665", "image": "ScienceQA/images/3590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which substance would be easiest to pour quickly?" }, { "from": "gpt", "value": "The answer is Ocean water" } ], "clip_score": 28.546875, "loss": 2.8245391845703125 }, { "id": "identity_225531", "image": "DocVQA/images/txdw0217_44.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the table on page 44?" }, { "from": "gpt", "value": "The answer is Effects of Various Treatments on Vasomotor Symptoms" } ], "clip_score": 19.859375, "loss": 2.5625996589660645 }, { "id": "identity_99670", "image": "CLEVR-Math/images/CLEVR_train_021069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the purple sphere but smaller than the yellow sphere?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.640625, "loss": 2.7142772674560547 }, { "id": "identity_328518", "image": "DocVQA/images/tncw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which paper will Rosie focus on?" }, { "from": "gpt", "value": "The answer is Trudy Bush breast cancer paper" } ], "clip_score": 20.28125, "loss": 4.0564961433410645 }, { "id": "identity_199429", "image": "VQA2.0/images/237608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clock is on the wall?" }, { "from": "gpt", "value": "The answer is Analog wall clock" } ], "clip_score": 26.421875, "loss": 3.0193569660186768 }, { "id": "identity_57528", "image": "ChartQA/images/12041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference in favorable opinions of Nawaz Sharif between 2010 and 2014?" }, { "from": "gpt", "value": "The answer is 7%" } ], "clip_score": 25.734375, "loss": 2.1650192737579346 }, { "id": "identity_283258", "image": "FigureQA/images/45875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cadet Blue the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.640625, "loss": 3.8742880821228027 }, { "id": "identity_44959", "image": "FigureQA/images/10354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Dark Orange in the pie chart?" }, { "from": "gpt", "value": "The answer is 0.56" } ], "clip_score": 38.15625, "loss": 2.0923032760620117 }, { "id": "identity_293387", "image": "PMC-VQA/images/PMC5648941_F1_229187.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of fracture displayed in the radiograph:\n(A) Colles fracture\n(B) Intertrochanteric fracture\n(C) Femoral neck fracture\n(D) Greenstick fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 4.627121925354004 }, { "id": "identity_41410", "image": "ChartQA/images/multi_col_100786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did the 0-14 years age group have the highest share in total population?" }, { "from": "gpt", "value": "The answer is 2009" } ], "clip_score": 29.5, "loss": 1.721697449684143 }, { "id": "identity_274587", "image": "VizWiz/images/VizWiz_train_00017780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is orange juice" } ], "clip_score": 29.09375, "loss": 2.5183207988739014 }, { "id": "identity_152855", "image": "ScienceQA/images/2860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the south pole of the magnet in Pair 1?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 31.84375, "loss": 3.4163029193878174 }, { "id": "identity_301751", "image": "UniGeo/images/2757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图: P为平行四边形ABCD外一点, 连接PA, PB, PC, PD, BD, 若S~△PAB~=1, S~△PBC~=7, 则S~△PBD~的面积为多少?\n选择:\n(A) 4\n(B) 5\n(C) 6\n(D) 7" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.96875, "loss": 2.38915753364563 }, { "id": "identity_321954", "image": "A-OKVQA/images/291140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is it necessary to often fix beach houses?\nChoices:\n(A) law\n(B) nosy neighbors\n(C) environmental wear\n(D) beauty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.90625, "loss": 3.8154914379119873 }, { "id": "identity_108448", "image": "VQA2.0/images/376436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man in the orange shirt doing?" }, { "from": "gpt", "value": "The answer is Walking" } ], "clip_score": 16.359375, "loss": 3.1319284439086914 }, { "id": "identity_105058", "image": "VQA2.0/images/486869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the kites flying at the same height?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.0, "loss": 2.9213690757751465 }, { "id": "identity_197880", "image": "A-OKVQA/images/176571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition is likely occurring based on the people's use of umbrellas?" }, { "from": "gpt", "value": "The answer is Rain" } ], "clip_score": 23.09375, "loss": 3.297576427459717 }, { "id": "identity_304298", "image": "PMC-VQA/images/PMC7772687_f0005_332266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What becomes difficult to distinguish with lower image resolution on the ex vivo trapezium image shown?\nChoices:\n(A) Cortisol\n(B) Pores near the cortex\n(C) Arthritis\n(D) Muscle fibers" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.890625, "loss": 4.122092247009277 }, { "id": "identity_158205", "image": "ScienceQA/images/12187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What body of water is located to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 27.59375, "loss": 2.4586455821990967 }, { "id": "identity_336942", "image": "IconQA/images/10691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many of these are there?\nChoices:\n(A) 10\n(B) 6\n(C) 8\n(D) 3\n(E) 1" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.59375, "loss": 3.124608278274536 }, { "id": "identity_151764", "image": "ScienceQA/images/15557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common item that both Bridgette and Anthony want?" }, { "from": "gpt", "value": "The answer is Water" } ], "clip_score": 30.84375, "loss": 3.216247081756592 }, { "id": "identity_299521", "image": "VQA2.0/images/501540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of trees present?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 14.6171875, "loss": 2.2387962341308594 }, { "id": "identity_134117", "image": "PlotQA/images/111286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year has the highest number of out-of-school male children?" }, { "from": "gpt", "value": "The answer is 2003" } ], "clip_score": 29.046875, "loss": 1.8068408966064453 }, { "id": "identity_168699", "image": "IconQA/images/29000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this shape have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.0, "loss": 2.021796226501465 }, { "id": "identity_55445", "image": "DVQA/images/bar_train_00090587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more 'figure' values than 'kid' values in the 'salt' group?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.5625, "loss": 3.031419515609741 }, { "id": "identity_175564", "image": "IconQA/images/40051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Adam took a 30-minute break between each activity, what time did he finish all activities?" }, { "from": "gpt", "value": "The answer is 9:00 PM" } ], "clip_score": 28.8125, "loss": 1.7417405843734741 }, { "id": "identity_248496", "image": "TabMWP/images/35364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Mitch need to buy 3 boa constrictors, 2 tortoises, and 1 macaw?" }, { "from": "gpt", "value": "The answer is $2,564" } ], "clip_score": 30.125, "loss": 2.0743675231933594 }, { "id": "identity_293764", "image": "IconQA/images/39160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you randomly choose a marble without seeing it, which color are you less likely to select?\nChoices:\n(A) green\n(B) black" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.09375, "loss": 4.334734916687012 }, { "id": "identity_158132", "image": "AI2D/images/1300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage comes immediately after the seedling develops leaves?" }, { "from": "gpt", "value": "The answer is Stage 4 Young Plant" } ], "clip_score": 34.46875, "loss": 2.211815118789673 }, { "id": "identity_255936", "image": "ChartQA/images/1714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of responses for 'nursing homes in your area' across all confidence levels?" }, { "from": "gpt", "value": "The answer is 99%" } ], "clip_score": 25.0, "loss": 2.657273054122925 }, { "id": "identity_198122", "image": "A-OKVQA/images/5256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheeled vehicles can be seen in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.375, "loss": 4.270214557647705 }, { "id": "identity_279749", "image": "UniGeo/images/1282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AD is a tangent to circle O at point A, AC is the diameter of circle O, and CD intersects circle O at point B. If the measure of angle ABD is 70°, what is the measure of angle D?\nChoices:\n(A) 70°\n(B) 60°\n(C) 55°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.3125, "loss": 2.453369617462158 }, { "id": "identity_293307", "image": "Geometry3K/images/884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the area of the parallelogram and round to the nearest tenth if needed.\nChoices:\n(A) 41.0\n(B) 57.9\n(C) 70.9\n(D) 81.9" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.15625, "loss": 2.5311169624328613 }, { "id": "identity_85036", "image": "UniGeo/images/869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠OCE?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 31.078125, "loss": 1.8382987976074219 }, { "id": "identity_239699", "image": "ScienceQA/images/16569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the tertiary consumer in this food web and list all the organisms it directly preys upon" }, { "from": "gpt", "value": "The answer is Rough-legged hawk, snowy owl; short-tailed weasel, brown lemming" } ], "clip_score": 31.296875, "loss": 1.9488664865493774 }, { "id": "identity_212697", "image": "VQA2.0/images/440903.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are carrying surfboards in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.34375, "loss": 2.3792660236358643 }, { "id": "identity_144105", "image": "FigureQA/images/47181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Dark Red section represent more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.84375, "loss": 3.1652638912200928 }, { "id": "identity_165256", "image": "GeoQA+/images/969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle A in triangle ABC?" }, { "from": "gpt", "value": "The answer is 65 degrees" } ], "clip_score": 35.28125, "loss": 2.267784595489502 }, { "id": "identity_35213", "image": "Super-CLEVR/images/superCLEVR_new_000737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a object that is both in front of the blue shiny object and behind the large shiny mountain bike; what shape is it?" }, { "from": "gpt", "value": "The answer is double bus" } ], "clip_score": 28.71875, "loss": 4.379169940948486 }, { "id": "identity_133009", "image": "A-OKVQA/images/497227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main subject of the painting?" }, { "from": "gpt", "value": "The answer is A landscape with trees and a figure" } ], "clip_score": 19.28125, "loss": 4.161678314208984 }, { "id": "identity_210251", "image": "TextVQA/images/3d1380bcf7b40e57.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street name where one of the new markets is located?" }, { "from": "gpt", "value": "The answer is Rue Ontario" } ], "clip_score": 15.4609375, "loss": 3.5126826763153076 }, { "id": "identity_107592", "image": "VQA2.0/images/155309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is traffic instructed to go according to the road sign?" }, { "from": "gpt", "value": "The answer is Right turn only" } ], "clip_score": 29.078125, "loss": 2.8142001628875732 }, { "id": "identity_193578", "image": "TextVQA/images/2ce1837566e54662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is displayed on the billboard with the basketball image?" }, { "from": "gpt", "value": "The answer is Wake up people" } ], "clip_score": 24.609375, "loss": 3.788208246231079 }, { "id": "identity_191730", "image": "PMC-VQA/images/PMC3349020_Fig1_137069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the patient's body is marked with an 'L'?" }, { "from": "gpt", "value": "The answer is The patient's left side" } ], "clip_score": 23.203125, "loss": 3.1171369552612305 }, { "id": "identity_1192", "image": "A-OKVQA/images/169576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What status is the person the people are waiting for?\nChoices:\n(A) non existent\n(B) medium\n(C) high\n(D) low" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.203125, "loss": 4.230738162994385 }, { "id": "identity_10624", "image": "FigureQA/images/90937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Mint less than Dark Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.4375, "loss": 3.642421007156372 }, { "id": "identity_141579", "image": "DVQA/images/bar_train_00061440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object category has the highest preference for the 'basis' category?" }, { "from": "gpt", "value": "The answer is damage" } ], "clip_score": 33.0625, "loss": 3.8159637451171875 }, { "id": "identity_119406", "image": "VQA2.0/images/426585.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is visible on the back of the person's shirt in the image?" }, { "from": "gpt", "value": "The answer is Canon" } ], "clip_score": 17.859375, "loss": 2.4928815364837646 }, { "id": "identity_77328", "image": "TQA/images/question_images/human_system_respiratory_6199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organ that is protected by the ribcage in this image" }, { "from": "gpt", "value": "The answer is Lungs" } ], "clip_score": 32.28125, "loss": 2.544076919555664 }, { "id": "identity_338462", "image": "PMC-VQA/images/PMC3324384_F1_133533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the small dark cracks seen in images (F) correspond to?\nChoices:\n(A) Cross-sectional structures after being subjected to fire\n(B) Cross-sectional structures after being subjected to mechanical stress\n(C) Cross-sectional structures after being subjected to water\n(D) Cross-sectional structures after being subjected to corrosion" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.96875, "loss": 4.815402984619141 }, { "id": "identity_158007", "image": "TQA/images/abc_question_images/tides_12608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the side of the Earth experiencing a low tide?" }, { "from": "gpt", "value": "The answer is P" } ], "clip_score": 27.90625, "loss": 3.3152709007263184 }, { "id": "identity_171445", "image": "IconQA/images/3829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each triangle represents a value of 5, what is the total value represented by all the triangles?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 31.21875, "loss": 1.5985095500946045 }, { "id": "identity_250822", "image": "PMC-VQA/images/PMC5168473_fig3_106342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total area affected on the left ventral surface of the sacral ala on the axial plane of a STIR image, given that the affected region is a rectangle with a length of 2 cm and a width of 1.5 cm?" }, { "from": "gpt", "value": "The answer is 3 cm²" } ], "clip_score": 26.484375, "loss": 1.214671015739441 }, { "id": "identity_313978", "image": "TextVQA/images/f9d18252cce3fdd0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this?" }, { "from": "gpt", "value": "The answer is europa pro lt" } ], "clip_score": 28.59375, "loss": 3.8737611770629883 }, { "id": "identity_81706", "image": "UniGeo/images/1076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if quadrilateral ABCD is inscribed in circle O and ∠GBC = 50°, what is the measure of ∠BGC?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 34.8125, "loss": 1.9067398309707642 }, { "id": "identity_141620", "image": "ChartQA/images/multi_col_60755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the percentage for Subscription video on demand higher in Western Europe than in Total for Eastern Europe?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.6875, "loss": 2.9411914348602295 }, { "id": "identity_49098", "image": "DocVQA/images/lsvy0000_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the asterisk symbol (*) represent in the sales column?" }, { "from": "gpt", "value": "The answer is Partial week" } ], "clip_score": 20.6875, "loss": 4.079926490783691 }, { "id": "identity_231946", "image": "UniGeo/images/1015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C都在⊙O上,若∠AOB=76°,则∠ACB的度数为多少?请计算∠ACB的度数,并解释为什么它是这个度数。" }, { "from": "gpt", "value": "The answer is 38°" } ], "clip_score": 33.5, "loss": 2.9175100326538086 }, { "id": "identity_135950", "image": "DocVQA/images/tkpj0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number provided in the letterhead?" }, { "from": "gpt", "value": "The answer is 212 532/8840" } ], "clip_score": 21.75, "loss": 2.396096706390381 }, { "id": "identity_34488", "image": "TabMWP/images/2667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some farmers compared how many sheep were in their flocks. What is the mean of the numbers?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 34.0, "loss": 2.4351933002471924 }, { "id": "identity_13658", "image": "IconQA/images/1574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, which color are you more likely to pick?\nChoices:\n(A) neither; black and light blue are equally likely\n(B) black\n(C) light blue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.96875, "loss": 3.443549633026123 }, { "id": "identity_333130", "image": "IconQA/images/90852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the spinner more likely to land on?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.515625, "loss": 4.21157169342041 }, { "id": "identity_115338", "image": "TextVQA/images/df4af07fa8e2cadd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color theme of the album covers displayed in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.28125, "loss": 3.750152587890625 }, { "id": "identity_163906", "image": "UniGeo/images/1995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图:已知AD∥BE∥CF,且AB=4,BC=5,EF=4,求AE的长度是多少?" }, { "from": "gpt", "value": "The answer is 7.2" } ], "clip_score": 30.71875, "loss": 2.7143476009368896 }, { "id": "identity_88695", "image": "GeoQA+/images/1315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of DE is 2, what is the length of AC?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.359375, "loss": 2.7406818866729736 }, { "id": "identity_333664", "image": "VQA2.0/images/70794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is that?" }, { "from": "gpt", "value": "The answer is black bear" } ], "clip_score": 26.453125, "loss": 2.616395950317383 }, { "id": "identity_219971", "image": "VQA2.0/images/33572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the shorts of the person standing on the left?" }, { "from": "gpt", "value": "The answer is Camouflage" } ], "clip_score": 17.375, "loss": 2.4812395572662354 }, { "id": "identity_90901", "image": "Geometry3K/images/1404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If quadrilateral ABCD is inscribed in circle Z and angle BZA is 104 degrees, what is the measure of angle BCD?" }, { "from": "gpt", "value": "The answer is 104 degrees" } ], "clip_score": 38.09375, "loss": 1.3517022132873535 }, { "id": "identity_130262", "image": "Super-CLEVR/images/superCLEVR_new_000452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle in the image that is not touching the ground?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.421875, "loss": 3.352813720703125 }, { "id": "identity_145826", "image": "DocVQA/images/rtxm0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the sixth topic in the third session?" }, { "from": "gpt", "value": "The answer is Expected factors affecting consumption of animal proteins" } ], "clip_score": 24.0, "loss": 3.0142714977264404 }, { "id": "identity_169213", "image": "TabMWP/images/4092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total votes were cast for bananas?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 27.546875, "loss": 2.1181468963623047 }, { "id": "identity_317775", "image": "UniGeo/images/2769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是半圆的直径,C和D是半圆上的两点,且D是AC的中点,连接AC,若∠B=70°,则∠DAB的度数是多少?\n选项:\n(A) 54°\n(B) 55°\n(C) 56°\n(D) 57°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.546875, "loss": 2.666395664215088 }, { "id": "identity_164529", "image": "GeoQA+/images/5696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of rectangle ABCD?" }, { "from": "gpt", "value": "The answer is Area = 4√3" } ], "clip_score": 28.75, "loss": 2.5533206462860107 }, { "id": "identity_294163", "image": "PMC-VQA/images/PMC5627306_Fig1_223950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining is depicted in (b) of the figure?\nChoices:\n(A) CD8 staining \n(B) MAC387 staining \n(C) HER2 staining \n(D) no staining shown" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.046875, "loss": 3.7020466327667236 }, { "id": "identity_43011", "image": "DVQA/images/bar_train_00022613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold for the item \"fury\" across all stores?" }, { "from": "gpt", "value": "The answer is 5.33" } ], "clip_score": 27.984375, "loss": 2.0662333965301514 }, { "id": "identity_297853", "image": "ChartQA/images/47990148004196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the total greater when adding the populations of Hungary and Turkey compared to Australia?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.703125, "loss": 2.8933868408203125 }, { "id": "identity_98534", "image": "TabMWP/images/9764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the wait times?" }, { "from": "gpt", "value": "The answer is 6 minutes" } ], "clip_score": 29.25, "loss": 2.881277561187744 }, { "id": "identity_38512", "image": "DocVQA/images/skww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the program which spent $195,000 ?" }, { "from": "gpt", "value": "The answer is Honoraria" } ], "clip_score": 30.5, "loss": 5.279086589813232 }, { "id": "identity_102842", "image": "CLEVR-Math/images/CLEVR_train_065641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are either green or purple?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.4375, "loss": 2.3983664512634277 }, { "id": "identity_58900", "image": "DocVQA/images/hqnk0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of Brush over the years?" }, { "from": "gpt", "value": "The answer is 1.5" } ], "clip_score": 21.859375, "loss": 1.9786427021026611 }, { "id": "identity_259593", "image": "TextVQA/images/43c80607ec7aac0c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address and phone number of the truck?" }, { "from": "gpt", "value": "The answer is 1607 63rd St, Emeryville, CA 94608, (415) 992-7394" } ], "clip_score": 22.109375, "loss": 1.3817988634109497 }, { "id": "identity_42780", "image": "FigureQA/images/69098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Blue line ever reach a value of 10 on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.390625, "loss": 2.973371982574463 }, { "id": "identity_307945", "image": "TQA/images/question_images/seasons_2578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many seasonal configurations are there for this planet?\nChoices:\n(A) 1\n(B) 3\n(C) 4\n(D) 2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.328125, "loss": 3.225801467895508 }, { "id": "identity_243660", "image": "IconQA/images/95532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue shapes are not circles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.78125, "loss": 2.7976999282836914 }, { "id": "identity_256274", "image": "DVQA/images/bar_train_00197282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the highest value to the lowest value for the 'salt' category in the chart?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 25.34375, "loss": 1.8190784454345703 }, { "id": "identity_323625", "image": "CLEVR-Math/images/CLEVR_train_015048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all small brown metal objects. Remove all small round objects. How many items remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.25, "loss": 2.3882763385772705 }, { "id": "identity_85807", "image": "GeoQA+/images/596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠DOF?" }, { "from": "gpt", "value": "The answer is 156°" } ], "clip_score": 33.09375, "loss": 2.667229175567627 }, { "id": "identity_146990", "image": "DVQA/images/bar_train_00044537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'dress' label have a consistent value across both groups?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.109375, "loss": 2.860008716583252 }, { "id": "identity_246446", "image": "TextVQA/images/e3089de36770108f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the department and the city where Hamilton Watch Company is located?" }, { "from": "gpt", "value": "The answer is Dept. 45, Lancaster" } ], "clip_score": 35.75, "loss": 2.641547918319702 }, { "id": "identity_60180", "image": "FigureQA/images/85053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Dark Orchid at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 23.25, "loss": 2.707857370376587 }, { "id": "identity_21601", "image": "TextVQA/images/decac8e03348ea64.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: it is time is 2.50?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.234375, "loss": 3.1492676734924316 }, { "id": "identity_131507", "image": "TextVQA/images/816d7cb9c3200b6c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the table on which the monitor and brochures are displayed?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.203125, "loss": 3.0980544090270996 }, { "id": "identity_283426", "image": "IconQA/images/31498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 4.482545852661133 }, { "id": "identity_145045", "image": "DocVQA/images/ymvx0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for the VIP Lunch arrangement?" }, { "from": "gpt", "value": "The answer is Lyon" } ], "clip_score": 18.875, "loss": 4.877752780914307 }, { "id": "identity_304087", "image": "VQA2.0/images/376436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sign referring to individuals?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.765625, "loss": 3.574441909790039 }, { "id": "identity_242176", "image": "ScienceQA/images/19307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can Kamal and Jake trade to each get two items they want?" }, { "from": "gpt", "value": "The answer is Kamal's tomatoes for Jake's broccoli and Kamal's oranges for Jake's almonds" } ], "clip_score": 27.890625, "loss": 1.4971102476119995 }, { "id": "identity_312457", "image": "TextVQA/images/a25cf75ae0134b03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does it say about the range of fireworks?" }, { "from": "gpt", "value": "The answer is largest" } ], "clip_score": 26.765625, "loss": 3.9881591796875 }, { "id": "identity_134678", "image": "DVQA/images/bar_train_00135848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the Y-axis?" }, { "from": "gpt", "value": "The answer is period, holder, drink, speech, buffer" } ], "clip_score": 26.0, "loss": 2.5451900959014893 }, { "id": "identity_121707", "image": "Super-CLEVR/images/superCLEVR_new_001255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.390625, "loss": 2.3349781036376953 }, { "id": "identity_286255", "image": "VQA-AS/images/11181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the woman?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 14.734375, "loss": 3.720649242401123 }, { "id": "identity_233240", "image": "TQA/images/question_images/human_system_eye_6131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the retina and how does it interact with the optic nerve to process visual information?" }, { "from": "gpt", "value": "The answer is Converts light to signals, optic nerve transmits to brain" } ], "clip_score": 31.53125, "loss": 2.890029191970825 }, { "id": "identity_237822", "image": "DocVQA/images/skdv0228_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the net amount after deducting all the taxes from the total pay?" }, { "from": "gpt", "value": "The answer is $2134.72" } ], "clip_score": 20.671875, "loss": 2.297868251800537 }, { "id": "identity_66249", "image": "ScienceQA/images/17660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Missouri" } ], "clip_score": 33.03125, "loss": 3.217938184738159 }, { "id": "identity_155915", "image": "TQA/images/question_images/seasons_6289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tilt angle of the Earth's spin axis as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Approximately 23.5 degrees" } ], "clip_score": 29.75, "loss": 1.631516933441162 }, { "id": "identity_308255", "image": "IconQA/images/37984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show three activities Gabe did Friday before bed. Which activity did Gabe do last?\nChoices:\n(A) went for a walk\n(B) watched a movie\n(C) ate dinner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.8125, "loss": 3.3755383491516113 }, { "id": "identity_119542", "image": "TextVQA/images/7853c5792f2cbadd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product in the bottom left corner?" }, { "from": "gpt", "value": "The answer is TEN BANGS" } ], "clip_score": 9.1171875, "loss": 4.175565242767334 }, { "id": "identity_189527", "image": "VQA2.0/images/96877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any protective gear visible on the person performing the activity?" }, { "from": "gpt", "value": "The answer is Yes, a helmet" } ], "clip_score": 14.140625, "loss": 3.310960292816162 }, { "id": "identity_294834", "image": "UniGeo/images/2457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 小敏想知道校园内一棵大树的高度,她测得BC=10米,∠ACB=50°。树垂直于地面,请帮助她计算树高AB约为多少米。给定数据:sin50°≈0.77,cos50°≈0.64,tan50°≈1.2。\n选项:\n(A) 7.7\n(B) 8\n(C) 6.4\n(D) 12" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.796875, "loss": 2.388833999633789 }, { "id": "identity_121072", "image": "PMC-VQA/images/PMC6510869_Fig3_477973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the brain surface in the image?" }, { "from": "gpt", "value": "The answer is Ridged" } ], "clip_score": 28.859375, "loss": 3.3949978351593018 }, { "id": "identity_23928", "image": "PMC-VQA/images/PMC7869123_F2_363245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging planes are used to depict the enhancement seen in the image?\nChoices:\n(A) Sagittal and axial \n(B) Coronal and axial \n(C) Sagittal and coronal \n(D) Transverse and oblique " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.734375, "loss": 3.6153767108917236 }, { "id": "identity_273463", "image": "FigureQA/images/97957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the highest in value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.90625, "loss": 4.127871036529541 }, { "id": "identity_313632", "image": "DVQA/images/bar_train_00007103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is chosen by the fewest people in total across all categories?" }, { "from": "gpt", "value": "The answer is desk" } ], "clip_score": 28.84375, "loss": 2.9326603412628174 }, { "id": "identity_182530", "image": "VQA2.0/images/197351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two players" } ], "clip_score": 19.75, "loss": 4.518289566040039 }, { "id": "identity_14798", "image": "UniGeo/images/1265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A、B、C在⊙O上,∠ABC=31°,过点C作⊙O的切线交OA的延长线于点D.则∠D的大小为()\nChoices:\n(A) 32°\n(B) 31°\n(C) 28°\n(D) 27°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.734375, "loss": 2.6504414081573486 }, { "id": "identity_193112", "image": "A-OKVQA/images/262119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the railings surrounding the skatepark?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.71875, "loss": 3.2302498817443848 }, { "id": "identity_225829", "image": "ScienceQA/images/12200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the type of plate movement occurring at the Queen Charlotte Fault and describe the geological features or events that are typically associated with this type of boundary" }, { "from": "gpt", "value": "The answer is Earthquakes, fault lines, and lack of volcanic activity" } ], "clip_score": 39.84375, "loss": 3.005734443664551 }, { "id": "identity_96930", "image": "IconQA/images/18875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the pencil's length is the eraser cap?" }, { "from": "gpt", "value": "The answer is 1/6" } ], "clip_score": 35.59375, "loss": 1.6262809038162231 }, { "id": "identity_166623", "image": "IconQA/images/23729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangles are pointing upwards?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.5, "loss": 2.34659481048584 }, { "id": "identity_236713", "image": "PMC-VQA/images/PMC2213640_F1_16654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key features in image E that indicate it was obtained using SEM, and how do these features compare to those obtained using TEM and Light Microscopy?" }, { "from": "gpt", "value": "The answer is SEM features include 3D appearance, surface texture, and depth" } ], "clip_score": 28.4375, "loss": 3.655299186706543 }, { "id": "identity_231309", "image": "ScienceQA/images/15265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which city is the capital of the state that is located directly east of Utah and directly north of New Mexico?" }, { "from": "gpt", "value": "The answer is Denver" } ], "clip_score": 33.84375, "loss": 2.30849289894104 }, { "id": "identity_258812", "image": "A-OKVQA/images/457922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the helmet of the player who is standing in the batter's box, and what is the color of the umpire's helmet who is standing with his hand on the catcher's back?" }, { "from": "gpt", "value": "The answer is black and red" } ], "clip_score": 26.46875, "loss": 2.9899861812591553 }, { "id": "identity_40284", "image": "FigureQA/images/9726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Brown segment in the pie chart?" }, { "from": "gpt", "value": "The answer is 25%" } ], "clip_score": 32.5, "loss": 1.5872814655303955 }, { "id": "identity_332377", "image": "TabMWP/images/9160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many additional students are in Mr. Center's class compared to Mr. East's class?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.0625, "loss": 2.1351659297943115 }, { "id": "identity_157537", "image": "TQA/images/question_images/human_system_nervous_6179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure is directly below the Temporal Lobe in this diagram?" }, { "from": "gpt", "value": "The answer is Brainstem" } ], "clip_score": 33.59375, "loss": 1.9440078735351562 }, { "id": "identity_260160", "image": "VQA2.0/images/204021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding umbrellas in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.578125, "loss": 2.4747557640075684 }, { "id": "identity_300212", "image": "FigureQA/images/82539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Indigo's value the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.96875, "loss": 4.0627264976501465 }, { "id": "identity_182165", "image": "A-OKVQA/images/132141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 14.234375, "loss": 2.8107006549835205 }, { "id": "identity_180219", "image": "CLEVR-Math/images/CLEVR_train_049680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are green?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.71875, "loss": 2.417807102203369 }, { "id": "identity_93761", "image": "IconQA/images/19245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shape a polyhedron?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.234375, "loss": 3.4735426902770996 }, { "id": "identity_266978", "image": "AI2D/images/3322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does C represent in the diagram provided?\nOptions:\n(A) cytoskeleton\n(B) none of the above\n(C) golgi body\n(D) ribosome" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.671875, "loss": 3.914203643798828 }, { "id": "identity_247042", "image": "TextVQA/images/f1cc427fa186604e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different lead sizes available for the Campus Junior Pencil, and what is the recommended lead size for beginners?" }, { "from": "gpt", "value": "The answer is 1.3 mm, 0.9 mm, 0.5 mm; recommended size is 1.3 mm" } ], "clip_score": 30.265625, "loss": 1.68354070186615 }, { "id": "identity_271139", "image": "DVQA/images/bar_train_00117485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy when combining the accuracies of the algorithms judge and ocean?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 40.84375, "loss": 2.4963912963867188 }, { "id": "identity_53999", "image": "DVQA/images/bar_train_00164898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'book' and 'phrase' datasets for the 'shape' algorithm?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.875, "loss": 3.386113405227661 }, { "id": "identity_92929", "image": "TabMWP/images/8033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which game begins immediately after the Vipers versus Crocodiles game according to the schedule?" }, { "from": "gpt", "value": "The answer is Thunder vs Blue Jays" } ], "clip_score": 28.984375, "loss": 2.017557144165039 }, { "id": "identity_334620", "image": "TextVQA/images/034bfc58141b6162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is that train sponsored by this company?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.25, "loss": 2.8133506774902344 }, { "id": "identity_149023", "image": "DocVQA/images/yscw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the due date for the draft mentioned in the document?" }, { "from": "gpt", "value": "The answer is July 23, 2001" } ], "clip_score": 27.28125, "loss": 1.6817967891693115 }, { "id": "identity_301168", "image": "ScienceQA/images/4763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the highlighted country?\nChoices:\n(A) Dominica\n(B) the Dominican Republic\n(C) Saint Vincent and the Grenadines\n(D) Saint Kitts and Nevis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 3.8223769664764404 }, { "id": "identity_287556", "image": "DocVQA/images/mymk0226_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of this sheet?" }, { "from": "gpt", "value": "The answer is research and evaluation" } ], "clip_score": 26.0625, "loss": 4.513519287109375 }, { "id": "identity_326028", "image": "PMC-VQA/images/PMC6686551_Fig2_21334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What distinguishes the two X-ray images?\nChoices:\n(A) The patient's position in the X-ray machine\n(B) The kind of X-ray machine used\n(C) The time when the X-ray was taken\n(D) The patient's medical condition" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.703125, "loss": 3.6422996520996094 }, { "id": "identity_228190", "image": "DVQA/images/bar_train_00059146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total people preferred the object phase across all the categories, and what is the average number of people who preferred each object across all categories?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 34.03125, "loss": 3.2911617755889893 }, { "id": "identity_13388", "image": "VizWiz/images/VizWiz_train_00008283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do you see on my desk?" }, { "from": "gpt", "value": "The answer is laptop remote electronics food" } ], "clip_score": 26.890625, "loss": 4.684682846069336 }, { "id": "identity_274895", "image": "UniGeo/images/1905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, the bisectors of the two exterior angles ∠CBE and ∠CDF of quadrilateral ABCD intersect at point G. If ∠A = 52° and ∠DGB = 28°, what is the measure of ∠DCB?\nChoices:\n(A) 152°\n(B) 128°\n(C) 108°\n(D) 80°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.375, "loss": 2.1555848121643066 }, { "id": "identity_259255", "image": "IconQA/images/58101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total perimeter of all the dots combined if each dot has a diameter of 2 units?" }, { "from": "gpt", "value": "The answer is \\( 6\\pi \\) units" } ], "clip_score": 24.515625, "loss": 2.96136474609375 }, { "id": "identity_163701", "image": "UniGeo/images/1051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,A,B,C三点在⊙O上,若∠BOC=130°,则∠BAC等于多少度?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 33.6875, "loss": 2.1202735900878906 }, { "id": "identity_315344", "image": "DVQA/images/bar_train_00175329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the fourth bar from the left in these groups?" }, { "from": "gpt", "value": "The answer is pure" } ], "clip_score": 17.921875, "loss": 4.096346378326416 }, { "id": "identity_178937", "image": "TabMWP/images/11993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of two pieces of smoky quartz and one piece of copper?" }, { "from": "gpt", "value": "The answer is $1.20" } ], "clip_score": 31.46875, "loss": 2.088192939758301 }, { "id": "identity_266572", "image": "UniGeo/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If triangle △ABC is rotated clockwise 35° around point C to form △A'B'C, where A'B' intersects AC at point D and ∠A'DC = 90°, what is the measure of ∠A in degrees?\n(A) 35°\n(B) 65°\n(C) 55°\n(D) 25°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.75, "loss": 2.4717326164245605 }, { "id": "identity_21541", "image": "PMC-VQA/images/PMC6519979_FIG2_480536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to capture the image? \nChoices:\n(A) X-ray imaging \n(B) Computed Tomography (CT) \n(C) Magnetic Resonance Imaging (MRI) \n(D) Positron Emission Tomography (PET) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.8125, "loss": 3.9428272247314453 }, { "id": "identity_72645", "image": "AI2D/images/201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the producer in this food web" }, { "from": "gpt", "value": "The answer is TREES" } ], "clip_score": 31.140625, "loss": 2.1206796169281006 }, { "id": "identity_41402", "image": "DocVQA/images/sphv0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the figure number mentioned at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Fig. 5" } ], "clip_score": 17.84375, "loss": 2.0398781299591064 }, { "id": "identity_337001", "image": "FigureQA/images/49961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.6640625, "loss": 4.052105903625488 }, { "id": "identity_298984", "image": "Super-CLEVR/images/superCLEVR_new_000637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of other objects with the same shape as the green object?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 15.359375, "loss": 2.709233522415161 }, { "id": "identity_287097", "image": "DocVQA/images/gryc0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who holds the position of chairman in the curriculum committee?" }, { "from": "gpt", "value": "The answer is Whit Carhart" } ], "clip_score": 22.375, "loss": 2.9482035636901855 }, { "id": "identity_83154", "image": "UniGeo/images/1767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OC is the bisector of angle AOB and angle AOC is 75 degrees, what is the measure of angle AOB?" }, { "from": "gpt", "value": "The answer is 150 degrees" } ], "clip_score": 32.65625, "loss": 1.7683629989624023 }, { "id": "identity_33468", "image": "A-OKVQA/images/190393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is attached to the chair?\nChoices:\n(A) knives\n(B) apples\n(C) wheels\n(D) balloons" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5390625, "loss": 4.365539073944092 }, { "id": "identity_89275", "image": "GEOS/images/045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the length of side BD to the length of side BC in the diagram above?" }, { "from": "gpt", "value": "The answer is 1:5" } ], "clip_score": 27.25, "loss": 1.3413454294204712 }, { "id": "identity_73963", "image": "AI2D/images/4135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the diagram, what is produced during respiration besides energy?" }, { "from": "gpt", "value": "The answer is Water Vapor" } ], "clip_score": 34.875, "loss": 2.3188323974609375 }, { "id": "identity_325024", "image": "DocVQA/images/zlmf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this?" }, { "from": "gpt", "value": "The answer is 7/28/77" } ], "clip_score": 14.9921875, "loss": 1.8744663000106812 }, { "id": "identity_89211", "image": "UniGeo/images/133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if the length of AB is 10 and the length of AG is 8, what is the length of segment GC?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 34.28125, "loss": 2.4835081100463867 }, { "id": "identity_73797", "image": "TQA/images/question_images/parts_microscope_7190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objective lenses are present on this microscope?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 34.78125, "loss": 4.173919677734375 }, { "id": "identity_209880", "image": "Super-CLEVR/images/superCLEVR_new_001927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the right of the purple plane and left of the shiny bus?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 31.21875, "loss": 4.597157955169678 }, { "id": "identity_60317", "image": "FigureQA/images/84751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color that has the highest y-axis value?" }, { "from": "gpt", "value": "The answer is Sky Blue" } ], "clip_score": 25.84375, "loss": 2.9043378829956055 }, { "id": "identity_47754", "image": "DocVQA/images/hgcg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main agenda item to be reviewed in the meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Note" } ], "clip_score": 25.421875, "loss": 4.456608295440674 }, { "id": "identity_203038", "image": "A-OKVQA/images/283290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the background?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 21.6875, "loss": 2.347560405731201 }, { "id": "identity_300686", "image": "VizWiz/images/VizWiz_train_00013489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this object?" }, { "from": "gpt", "value": "The answer is toy" } ], "clip_score": 23.015625, "loss": 2.8209235668182373 }, { "id": "identity_206642", "image": "A-OKVQA/images/477647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the refrigerator interior light?" }, { "from": "gpt", "value": "The answer is White light" } ], "clip_score": 27.28125, "loss": 3.2695436477661133 }, { "id": "identity_170656", "image": "IconQA/images/58378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour hand and the minute hand?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 24.25, "loss": 2.0042033195495605 }, { "id": "identity_68044", "image": "ScienceQA/images/20344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of the animal's feet as seen in the image?" }, { "from": "gpt", "value": "The answer is Digging" } ], "clip_score": 27.3125, "loss": 3.561225652694702 }, { "id": "identity_64950", "image": "ScienceQA/images/14856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the colony that is north of Virginia?" }, { "from": "gpt", "value": "The answer is Maryland" } ], "clip_score": 28.734375, "loss": 3.819096803665161 }, { "id": "identity_169417", "image": "IconQA/images/63627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more saddle is added to the image, how many saddles will there be in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.21875, "loss": 2.6178534030914307 }, { "id": "identity_329763", "image": "PMC-VQA/images/PMC8056102_Fig16_420908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the individual's condition?\nChoices:\n(A) Broken bones\n(B) Viral pneumonia\n(C) Appendicitis\n(D) Stomach ulcer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.248889923095703 }, { "id": "identity_135134", "image": "PlotQA/images/56884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did the aid flow from UN agencies to the World increase or decrease from 2009 to 2012?" }, { "from": "gpt", "value": "The answer is Decrease" } ], "clip_score": 29.390625, "loss": 2.548006534576416 }, { "id": "identity_99689", "image": "TabMWP/images/23704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of rounds in the spelling bee from 2008 to 2012?" }, { "from": "gpt", "value": "The answer is 69" } ], "clip_score": 40.9375, "loss": 2.2739617824554443 }, { "id": "identity_175274", "image": "IconQA/images/64822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more ovals or rectangles in the image?" }, { "from": "gpt", "value": "The answer is More ovals" } ], "clip_score": 21.3125, "loss": 2.14083194732666 }, { "id": "identity_192736", "image": "PMC-VQA/images/PMC7024228_molecules-25-00403-f004_119143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accelerating voltage used for this microscopic image?" }, { "from": "gpt", "value": "The answer is 5.00 kilovolts" } ], "clip_score": 27.078125, "loss": 1.5689399242401123 }, { "id": "identity_201373", "image": "PMC-VQA/images/PMC6194021_Fig4_383711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the images of the same magnification?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.25, "loss": 2.993455410003662 }, { "id": "identity_33516", "image": "PlotQA/images/132274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the under-five mortality rate in Belarus in 2010 ?" }, { "from": "gpt", "value": "The answer is 5.2" } ], "clip_score": 27.375, "loss": 1.9391638040542603 }, { "id": "identity_113216", "image": "Super-CLEVR/images/superCLEVR_new_001172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is The yellow bicycle" } ], "clip_score": 24.28125, "loss": 2.6946969032287598 }, { "id": "identity_54406", "image": "DVQA/images/bar_train_00076853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the \"ratio\" and \"unit\" algorithms for the \"break\" dataset?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.125, "loss": 3.0532541275024414 }, { "id": "identity_272916", "image": "CLEVR-Math/images/CLEVR_train_005507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes remain after removing all the purple metallic cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.96875, "loss": 2.2500874996185303 }, { "id": "identity_130340", "image": "A-OKVQA/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible price tag on the table with pastries?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.890625, "loss": 2.932405471801758 }, { "id": "identity_31002", "image": "VQA2.0/images/406282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the placemats?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 21.84375, "loss": 3.691941022872925 }, { "id": "identity_75380", "image": "AI2D/images/1133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are visible on the plant?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 26.796875, "loss": 4.522341728210449 }, { "id": "identity_26398", "image": "VQA2.0/images/408207.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are in the photo?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 13.5, "loss": 2.37552809715271 }, { "id": "identity_299926", "image": "A-OKVQA/images/188955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which hemisphere are most of these sports establishments situated?\nChoices:\n(A) northern\n(B) southern\n(C) eastern\n(D) western" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.109375, "loss": 4.005253791809082 }, { "id": "identity_46130", "image": "FigureQA/images/22732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.65625, "loss": 3.8453774452209473 }, { "id": "identity_109997", "image": "TextVQA/images/57b18b2387e1a9ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the studio mentioned on the sign in the image?" }, { "from": "gpt", "value": "The answer is Gary Fields Studios" } ], "clip_score": 22.015625, "loss": 2.263096809387207 }, { "id": "identity_130502", "image": "A-OKVQA/images/207513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the base of the sign?" }, { "from": "gpt", "value": "The answer is Alternating blue and white stripes" } ], "clip_score": 25.171875, "loss": 2.6479485034942627 }, { "id": "identity_4337", "image": "GeoQA+/images/5840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,在Rt△ABC中,分别以三角形的三条边为边向外作正方形,面积分别记为S1,S2,S3,若S1=7,S2=24,则S3的值为()\nChoices:\n(A) 17\n(B) 20\n(C) 25\n(D) 31" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.53125, "loss": 2.3146042823791504 }, { "id": "identity_109751", "image": "A-OKVQA/images/197959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the countertop in the kitchen?" }, { "from": "gpt", "value": "The answer is Brown and beige" } ], "clip_score": 26.765625, "loss": 3.396470308303833 }, { "id": "identity_255887", "image": "ScienceQA/images/2404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which property matches this object, and what material is it made of?" }, { "from": "gpt", "value": "The answer is Hard, ceramic" } ], "clip_score": 21.765625, "loss": 3.676650047302246 }, { "id": "identity_62602", "image": "MapQA/images/map_3598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the state of Texas?" }, { "from": "gpt", "value": "The answer is 75-86" } ], "clip_score": 25.859375, "loss": 2.3440840244293213 }, { "id": "identity_27027", "image": "TextVQA/images/1c1e6212320b3c5d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the umpires number?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.671875, "loss": 2.430413246154785 }, { "id": "identity_89820", "image": "UniGeo/images/1909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of angles A' and B' in the folded quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 190°" } ], "clip_score": 35.65625, "loss": 2.0310211181640625 }, { "id": "identity_209058", "image": "A-OKVQA/images/472828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of wheel does the vendor's cart have?" }, { "from": "gpt", "value": "The answer is Spoked wheel" } ], "clip_score": 19.296875, "loss": 2.8306357860565186 }, { "id": "identity_170610", "image": "TabMWP/images/17812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units does the y-value decrease for each increase of 1 unit in x-value?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.46875, "loss": 2.3320324420928955 }, { "id": "identity_149201", "image": "DocVQA/images/rkwg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tenure group of the employee?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.34375, "loss": 3.329633951187134 }, { "id": "identity_283219", "image": "IconQA/images/92983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the crayon in cubes? The crayon measures (_) cubes in length." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.203125, "loss": 1.8412400484085083 }, { "id": "identity_243934", "image": "IconQA/images/7897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the third picture if the first picture is a moon and the second picture is a star?" }, { "from": "gpt", "value": "The answer is rocket" } ], "clip_score": 29.453125, "loss": 2.832630157470703 }, { "id": "identity_74428", "image": "ScienceQA/images/11699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of light source as indicated by the shadow on the balloon?" }, { "from": "gpt", "value": "The answer is From the top left corner" } ], "clip_score": 29.578125, "loss": 3.355658531188965 }, { "id": "identity_226655", "image": "PMC-VQA/images/PMC7439464_fig4_238121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key structural differences observed in the image that indicate it was captured using transmission electron microscopy, and how do these differences compare to images captured using the other microscopy techniques listed?" }, { "from": "gpt", "value": "The answer is Detailed internal structures and high resolution indicate TEM" } ], "clip_score": 27.1875, "loss": 4.271275997161865 }, { "id": "identity_240753", "image": "TabMWP/images/6251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Naomi missed the 9:45 A.M. train at Richmond and wants to reach Seaside as soon as possible. What is the earliest time she can arrive at Seaside if she takes the next available train from Richmond?" }, { "from": "gpt", "value": "The answer is 12:00 P.M" } ], "clip_score": 30.296875, "loss": 1.4133599996566772 }, { "id": "identity_177696", "image": "IconQA/images/101354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Luke sets the alarm to ring after 30 minutes, what time will the alarm ring?" }, { "from": "gpt", "value": "The answer is 8:30" } ], "clip_score": 30.4375, "loss": 1.3672233819961548 }, { "id": "identity_176910", "image": "TabMWP/images/16090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Alex need to buy a bottle of perfume and a bottle of shampoo?" }, { "from": "gpt", "value": "The answer is $5.55" } ], "clip_score": 31.484375, "loss": 1.997222900390625 }, { "id": "identity_189697", "image": "PMC-VQA/images/PMC6020456_fig2_333757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What condition is indicated by the presence of the arrows?" }, { "from": "gpt", "value": "The answer is Osteomas" } ], "clip_score": 25.28125, "loss": 4.179623603820801 }, { "id": "identity_266970", "image": "Super-CLEVR/images/superCLEVR_new_000695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there a smaller number of small green matte minivans compared to sedans?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 28.421875, "loss": 5.039587497711182 }, { "id": "identity_172038", "image": "IconQA/images/69908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fourth picture?" }, { "from": "gpt", "value": "The answer is Teddy bear" } ], "clip_score": 13.1796875, "loss": 2.703989267349243 }, { "id": "identity_88686", "image": "GeoQA+/images/4197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a∥b,∠1=85°,∠2=30°, what is the measure of ∠4?" }, { "from": "gpt", "value": "The answer is 85°" } ], "clip_score": 28.65625, "loss": 1.6831506490707397 }, { "id": "identity_325552", "image": "FigureQA/images/98804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.15625, "loss": 4.189608573913574 }, { "id": "identity_214495", "image": "Super-CLEVR/images/superCLEVR_new_001616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more purple objects or green objects in the image?" }, { "from": "gpt", "value": "The answer is More purple objects" } ], "clip_score": 28.8125, "loss": 2.3452064990997314 }, { "id": "identity_209046", "image": "PMC-VQA/images/PMC7003845_vop12710-fig-0002_112141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the asterisk (*) in the image?" }, { "from": "gpt", "value": "The answer is It indicates a specific area of interest or abnormality" } ], "clip_score": 21.890625, "loss": 3.169562339782715 }, { "id": "identity_141513", "image": "ChartQA/images/10595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of U.S. adults who have changed their use of social media sites \"a great deal\" or \"somewhat\" after hearing about government surveillance programs?" }, { "from": "gpt", "value": "The answer is 15%" } ], "clip_score": 33.03125, "loss": 1.568506121635437 }, { "id": "identity_102101", "image": "TabMWP/images/8513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more new customers did the cable company have on Friday compared to Sunday?" }, { "from": "gpt", "value": "The answer is 345" } ], "clip_score": 35.46875, "loss": 2.643622398376465 }, { "id": "identity_207571", "image": "PMC-VQA/images/PMC7008751_FIG2_113810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct circular shapes are visible in the non-blurred portion of the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.75, "loss": 4.353660583496094 }, { "id": "identity_51313", "image": "MapQA/images/map_10507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Florida greater than 75?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.53125, "loss": 3.689033031463623 }, { "id": "identity_21140", "image": "PMC-VQA/images/PMC3522352_fig192_172905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the area of the canal depend on? \nChoices:\n(A) AP diameter \n(B) longitudinal length \n(C) opening width \n(D) distance between the openings " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.171875, "loss": 4.375450611114502 }, { "id": "identity_94506", "image": "TabMWP/images/23972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the trend of cheese consumption increase continues, what would be the projected cheese consumption per capita for Canada in 2016, assuming the same rate of increase?" }, { "from": "gpt", "value": "The answer is 12 kg" } ], "clip_score": 30.6875, "loss": 2.163469076156616 }, { "id": "identity_54557", "image": "DVQA/images/bar_train_00115763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the item brass have more than 5 units sold?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.703125, "loss": 3.110445499420166 }, { "id": "identity_106338", "image": "VizWiz/images/VizWiz_train_00016968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the object placed to the left of the water bottle?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 32.875, "loss": 4.03870964050293 }, { "id": "identity_86537", "image": "UniGeo/images/969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C在⊙O上,AC∥OB,∠BAO=25°,则∠OAC的度数为()" }, { "from": "gpt", "value": "The answer is 77.5°" } ], "clip_score": 34.4375, "loss": 1.8412284851074219 }, { "id": "identity_93606", "image": "TabMWP/images/25666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many events are scheduled to take place in the afternoon?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 28.28125, "loss": 2.47182559967041 }, { "id": "identity_284179", "image": "PMC-VQA/images/PMC8876035_jcm-11-01002-f001_205890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen in the midbrain on axial FLAIR images?\nChoices:\n(A) Decreased signals \n(B) No change in signals \n(C) Increased signals \n(D) Irregular signals" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.765625, "loss": 4.204922676086426 }, { "id": "identity_57026", "image": "DVQA/images/bar_train_00127126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value difference between \"wave\" and \"sorry\" in the \"tire\" category?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 24.90625, "loss": 2.428027629852295 }, { "id": "identity_267158", "image": "AI2D/images/3716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label correctly identifies L among the given choices: Appendix, Esophagus, Oral cavity, or Stomach?\nChoices:\n(A) Appendix\n(B) Esophagus\n(C) Oral cavity\n(D) Stomach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.109375, "loss": 4.497365474700928 }, { "id": "identity_164378", "image": "GeoQA+/images/105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the exterior angle adjacent to the 45-degree angle in the larger triangle?" }, { "from": "gpt", "value": "The answer is 135 degrees" } ], "clip_score": 36.28125, "loss": 1.6901062726974487 }, { "id": "identity_254559", "image": "MapQA/images/map_7090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of pickers between 39.6% and 47.4%?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 36.1875, "loss": 2.3393924236297607 }, { "id": "identity_106906", "image": "VQA2.0/images/486711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the flying object in the sky?" }, { "from": "gpt", "value": "The answer is Star-shaped" } ], "clip_score": 19.890625, "loss": 2.7209432125091553 }, { "id": "identity_141351", "image": "ChartQA/images/multi_col_936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which time period had the highest Sunday circulation?" }, { "from": "gpt", "value": "The answer is January to December 2016" } ], "clip_score": 24.875, "loss": 1.8574029207229614 }, { "id": "identity_15092", "image": "TabMWP/images/10349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The members of the science club shared how many times they had been to the zoo last year. How many members went to the zoo more than 4 times?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 35.03125, "loss": 2.3509812355041504 }, { "id": "identity_238629", "image": "VizWiz/images/VizWiz_train_00019475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many head stones are there in the image, and what is the average height of the head stones if the tallest one is 1.5 meters and the shortest one is 0.5 meters?" }, { "from": "gpt", "value": "The answer is 24 head stones, average height is 1.0 meter" } ], "clip_score": 28.984375, "loss": 1.8945115804672241 }, { "id": "identity_337356", "image": "IconQA/images/34740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show three activities Carla did Sunday evening. Which activity did Carla do last?\nChoices:\n(A) listened to music\n(B) practiced piano\n(C) went for a walk" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.375, "loss": 3.6321825981140137 }, { "id": "identity_108461", "image": "TextVQA/images/4cf1cb2759735961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sailboats are visible in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.65625, "loss": 2.449578046798706 }, { "id": "identity_288824", "image": "A-OKVQA/images/140990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What significance can be associated with the number in terms of its meaning?\nChoices:\n(A) lucky\n(B) unlucky\n(C) unholy\n(D) holy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 6.05078125, "loss": 4.25413703918457 }, { "id": "identity_290471", "image": "TQA/images/question_images/human_system_nervous_6191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the brain is situated at the front?\nChoices:\n(A) Lateral sulcus\n(B) Parietal lobe\n(C) Frontal lobe\n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.09375, "loss": 4.354151725769043 }, { "id": "identity_114651", "image": "A-OKVQA/images/414999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.4375, "loss": 2.4624736309051514 }, { "id": "identity_268989", "image": "FigureQA/images/17189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 3.913644313812256 }, { "id": "identity_93860", "image": "TabMWP/images/33792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the photographer take the least number of pictures?" }, { "from": "gpt", "value": "The answer is Thursday" } ], "clip_score": 35.34375, "loss": 2.142939805984497 }, { "id": "identity_311190", "image": "AI2D/images/35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter on the diagram indicates the egg stage?\nChoices:\n(A) B\n(B) D\n(C) C\n(D) A" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.75, "loss": 3.5096497535705566 }, { "id": "identity_208939", "image": "VQA2.0/images/125351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the curbs on the side of the street?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.359375, "loss": 3.0585384368896484 }, { "id": "identity_8655", "image": "GeoQA+/images/1758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C、A是⊙O上的点,∠CAB=20°,过点C作⊙O的切线交AB的延长线于点D,则∠D等于()\nChoices:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.890625, "loss": 2.7046921253204346 }, { "id": "identity_122431", "image": "VQA-RAD/images/synpic34054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality is used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 31.84375, "loss": 2.3819618225097656 }, { "id": "identity_179880", "image": "IconQA/images/43460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of rectangles to squares?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 31.6875, "loss": 1.6180872917175293 }, { "id": "identity_71845", "image": "TQA/images/question_images/radioactive_decay_8177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mass number of the daughter nucleus after the alpha decay of Uranium-238?" }, { "from": "gpt", "value": "The answer is 234" } ], "clip_score": 35.71875, "loss": 1.4695689678192139 }, { "id": "identity_249840", "image": "FigureQA/images/93701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the area under the curve between Burlywood and Dark Khaki?" }, { "from": "gpt", "value": "The answer is 250" } ], "clip_score": 24.8125, "loss": 2.2118687629699707 }, { "id": "identity_257876", "image": "IconQA/images/48009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sequence of transformations (slide, flip, turn) can be applied to the letter to achieve the final position shown in the image?" }, { "from": "gpt", "value": "The answer is Slide to the right" } ], "clip_score": 30.625, "loss": 2.7598915100097656 }, { "id": "identity_11792", "image": "ChartQA/images/two_col_81092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the no.1 cause in the chart?" }, { "from": "gpt", "value": "The answer is Shooting" } ], "clip_score": 23.109375, "loss": 2.6774652004241943 }, { "id": "identity_87123", "image": "GEOS/images/017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 80 degrees" } ], "clip_score": 33.25, "loss": 1.6563200950622559 }, { "id": "identity_107507", "image": "PMC-VQA/images/PMC3229513_pone-0027983-g007_118113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the labeled structure indicated by the arrow marked 'Vp'?" }, { "from": "gpt", "value": "The answer is Vascular pore" } ], "clip_score": 31.46875, "loss": 3.219902515411377 }, { "id": "identity_175601", "image": "IconQA/images/45963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one triangle is removed, how many sides will remain in total?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.5625, "loss": 2.2433040142059326 }, { "id": "identity_30548", "image": "PlotQA/images/29654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the 1st bar from the bottom in 2009 represents ?" }, { "from": "gpt", "value": "The answer is Competing against unregistered firms" } ], "clip_score": 26.453125, "loss": 2.1492879390716553 }, { "id": "identity_201163", "image": "VQA2.0/images/305703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many donuts can be seen on the conveyor belt?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 28.15625, "loss": 2.034371852874756 }, { "id": "identity_142700", "image": "DocVQA/images/hhbv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the public/private ratio for 4-Year Institutions in the fall of 1976?" }, { "from": "gpt", "value": "The answer is 29/71" } ], "clip_score": 30.328125, "loss": 1.9564476013183594 }, { "id": "identity_324105", "image": "DocVQA/images/pnbx0223_74.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Furnace Oil (Litre) for this period?" }, { "from": "gpt", "value": "The answer is 34" } ], "clip_score": 12.4609375, "loss": 2.6521949768066406 }, { "id": "identity_163333", "image": "Geometry3K/images/1388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment JL?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.75, "loss": 2.6360182762145996 }, { "id": "identity_163424", "image": "GeoQA+/images/5648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle DEC in the given rhombus ABCD if angle DAE equals 80 degrees?" }, { "from": "gpt", "value": "The answer is 130 degrees" } ], "clip_score": 32.90625, "loss": 2.0839414596557617 }, { "id": "identity_176416", "image": "IconQA/images/105078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 26.84375, "loss": 1.6439236402511597 }, { "id": "identity_29592", "image": "PMC-VQA/images/PMC7600865_medicina-56-00534-f002_287455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the size and location of the lesion seen in the CT scan? \nChoices:\n(A) 5 mm lesion near the AS joint. \n(B) 18 mm lesion near the SI joint. \n(C) 20 mm lesion near the AI joint. \n(D) 25 mm lesion near the PI joint. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 3.708895444869995 }, { "id": "identity_216043", "image": "TextVQA/images/a9f3b1fa9c7caabd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible riding bicycles in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.625, "loss": 4.227687835693359 }, { "id": "identity_104945", "image": "VQA2.0/images/105762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of knife is on the table?" }, { "from": "gpt", "value": "The answer is Kitchen knife" } ], "clip_score": 15.765625, "loss": 2.266721487045288 }, { "id": "identity_78405", "image": "TQA/images/abc_question_images/parts_telescope_18152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'U' signify in the context of this telescope diagram?" }, { "from": "gpt", "value": "The answer is Mount/Stand" } ], "clip_score": 27.203125, "loss": 5.4409003257751465 }, { "id": "identity_114599", "image": "VizWiz/images/VizWiz_train_00005045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object in the image new or does it show signs of wear?" }, { "from": "gpt", "value": "The answer is It shows signs of wear" } ], "clip_score": 18.09375, "loss": 2.693042755126953 }, { "id": "identity_258250", "image": "A-OKVQA/images/437967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main ingredients in the dish, including the meat and vegetables?" }, { "from": "gpt", "value": "The answer is Chicken, green leafy vegetables, red bell pepper, yellow bell pepper" } ], "clip_score": 32.625, "loss": 1.8689556121826172 }, { "id": "identity_293366", "image": "PMC-VQA/images/PMC3576199_f1-ol-05-03-0840_186418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of carcinoma was identified in the CT scan?\nOptions:\n(A) Small cell carcinoma\n(B) Adenocarcinoma\n(C) Squamous cell carcinoma\n(D) Large cell carcinoma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.15625, "loss": 3.9882900714874268 }, { "id": "identity_192181", "image": "TextVQA/images/d6e8bc5b8ee5d66e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is visible in the image?" }, { "from": "gpt", "value": "The answer is Apples" } ], "clip_score": 10.0078125, "loss": 2.915898084640503 }, { "id": "identity_76711", "image": "TQA/images/question_images/parts_ocean_floor_7240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many features labeled in the image are part of the continental margin?" }, { "from": "gpt", "value": "The answer is three (continental shelf, continental slope, abyss)" } ], "clip_score": 34.0, "loss": 2.2236557006835938 }, { "id": "identity_41501", "image": "DocVQA/images/lgbv0228_37.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year did ANN GRANDJEAN start her role as a Lecturer at Creighton University College of Nursing?" }, { "from": "gpt", "value": "The answer is 1972" } ], "clip_score": 24.5, "loss": 1.5095294713974 }, { "id": "identity_1290", "image": "AI2D/images/2285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many stages in the butterfly life cycle does this diagram show?\nChoices:\n(A) 5\n(B) 6\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.234375, "loss": 2.6328978538513184 }, { "id": "identity_196868", "image": "A-OKVQA/images/496313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lane is the bus currently driving in?" }, { "from": "gpt", "value": "The answer is B Lane" } ], "clip_score": 23.796875, "loss": 3.5681629180908203 }, { "id": "identity_74141", "image": "ScienceQA/images/10741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the image is a primary producer?" }, { "from": "gpt", "value": "The answer is Bear sedge, bilberry, and lichen" } ], "clip_score": 23.015625, "loss": 2.6362826824188232 }, { "id": "identity_260004", "image": "TextVQA/images/f0cb340ffd238456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the full names and corresponding instruments of the performers listed on this paper?" }, { "from": "gpt", "value": "The answer is Siegfried Behrend, Gitarre; Nicanor Zabaleta, Harfe" } ], "clip_score": 21.078125, "loss": 1.6911596059799194 }, { "id": "identity_105701", "image": "A-OKVQA/images/451364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vegetation visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a tree" } ], "clip_score": 9.28125, "loss": 2.967503786087036 }, { "id": "identity_107164", "image": "TextVQA/images/27f176045bc0021a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tail number of this aircraft?" }, { "from": "gpt", "value": "The answer is 51-6089" } ], "clip_score": 25.78125, "loss": 2.4383175373077393 }, { "id": "identity_249367", "image": "AI2D/images/368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the process that leads to the formation of intrusive igneous rock and how it causes a slow uplift to the surface, including the role of magma and the subsequent geological changes" }, { "from": "gpt", "value": "The answer is Magma rises, cools, solidifies, causing uplift and exposure" } ], "clip_score": 36.96875, "loss": 2.390726089477539 }, { "id": "identity_54456", "image": "FigureQA/images/29538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Purple section the smallest in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.90625, "loss": 3.3584117889404297 }, { "id": "identity_135695", "image": "DVQA/images/bar_train_00100522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have an accuracy of 6 or higher for the \"dirt\" dataset?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.828125, "loss": 2.438833236694336 }, { "id": "identity_26661", "image": "TabMWP/images/32321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each pile has 2 bricks. How many bricks are in 3 piles?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.28125, "loss": 1.7821950912475586 }, { "id": "identity_204165", "image": "A-OKVQA/images/32060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What mode of transportation is being used by the individuals in the center of the image?" }, { "from": "gpt", "value": "The answer is A horse-drawn carriage" } ], "clip_score": 26.375, "loss": 2.4422738552093506 }, { "id": "identity_871", "image": "TabMWP/images/2439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Brenna need to buy a four-inch nail and a red thumbtack?" }, { "from": "gpt", "value": "The answer is 0.06" } ], "clip_score": 34.4375, "loss": 2.0905184745788574 }, { "id": "identity_212773", "image": "VQA2.0/images/562217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction do the arrows on the road signs indicate?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 16.578125, "loss": 3.69734263420105 }, { "id": "identity_135808", "image": "ChartQA/images/two_col_5918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in GDP from 2010 to 2011?" }, { "from": "gpt", "value": "The answer is 5.7%" } ], "clip_score": 23.265625, "loss": 1.7082884311676025 }, { "id": "identity_40977", "image": "DocVQA/images/kgpx0078_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the projected attendance for the event?" }, { "from": "gpt", "value": "The answer is 80,000 (2 days)" } ], "clip_score": 16.921875, "loss": 2.436506509780884 }, { "id": "identity_310972", "image": "UniGeo/images/1872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是半径为5的圆的一条弦,且AB=8,若P是AB的中点,则OP的长度是()\n选项:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 2.308281421661377 }, { "id": "identity_103477", "image": "A-OKVQA/images/180787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What condiment is placed next to the white bowl on the counter?" }, { "from": "gpt", "value": "The answer is Guacamole or a green sauce" } ], "clip_score": 16.125, "loss": 2.442521572113037 }, { "id": "identity_147556", "image": "DocVQA/images/ymfv0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the chairman of Session IV?" }, { "from": "gpt", "value": "The answer is Sir Hedley Atkins" } ], "clip_score": 24.796875, "loss": 2.8364737033843994 }, { "id": "identity_120805", "image": "VQA-AS/images/3897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the curtains in the room?" }, { "from": "gpt", "value": "The answer is Floral" } ], "clip_score": 20.203125, "loss": 2.810152053833008 }, { "id": "identity_338116", "image": "PMC-VQA/images/PMC6476123_fig2_469077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the injury detected by the initial scan?\nChoices:\n(A) Bilateral fracture of the lateral mass of the sacrum. \n(B) Displaced fracture of the lumbar vertebra. \n(C) Bi-lateral fracture of the coccyx. \n(D) Herniated disk on the cervical spine." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.828125, "loss": 3.6446213722229004 }, { "id": "identity_5728", "image": "VQA2.0/images/232689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the floor have a checkered pattern?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.859375, "loss": 3.7559924125671387 }, { "id": "identity_191259", "image": "VQA2.0/images/170485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it likely to be based on the image?" }, { "from": "gpt", "value": "The answer is Likely morning or late afternoon" } ], "clip_score": 13.03125, "loss": 2.728400468826294 }, { "id": "identity_218601", "image": "Super-CLEVR/images/superCLEVR_new_000709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.96875, "loss": 4.217673301696777 }, { "id": "identity_74863", "image": "ScienceQA/images/18647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the house made of where the three pigs end up living?" }, { "from": "gpt", "value": "The answer is Brick" } ], "clip_score": 35.3125, "loss": 2.4514200687408447 }, { "id": "identity_119478", "image": "VQA2.0/images/434270.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the building directly behind the elephant?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 28.5625, "loss": 4.415472507476807 }, { "id": "identity_91389", "image": "Geometry3K/images/1425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of z in the figure?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 27.03125, "loss": 2.536564826965332 }, { "id": "identity_129763", "image": "VQA-AS/images/13779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object the boy is sitting on?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.8125, "loss": 3.4353859424591064 }, { "id": "identity_84507", "image": "UniGeo/images/2349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AC is the diameter of the circle and AB = BD, find the radius of the circle" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.96875, "loss": 2.672846794128418 }, { "id": "identity_170568", "image": "IconQA/images/63021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of upside-down triangles to right-side-up triangles?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 26.015625, "loss": 1.5259315967559814 }, { "id": "identity_129337", "image": "PMC-VQA/images/PMC7178603_Fig2_157518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the chest X-ray?" }, { "from": "gpt", "value": "The answer is Anterior-posterior (AP) view" } ], "clip_score": 32.0, "loss": 1.962424874305725 }, { "id": "identity_5806", "image": "TQA/images/abc_question_images/human_system_digestive_11388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letters identifies the pancreas?\nChoices:\n(A) L\n(B) V\n(C) X\n(D) A" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.78125, "loss": 4.144286155700684 }, { "id": "identity_93337", "image": "CLEVR-Math/images/CLEVR_train_026335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.09375, "loss": 2.548671007156372 }, { "id": "identity_25749", "image": "DocVQA/images/txdw0217_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the first column of the table?" }, { "from": "gpt", "value": "The answer is Treatment" } ], "clip_score": 20.015625, "loss": 3.1964008808135986 }, { "id": "identity_273921", "image": "TextVQA/images/a2a51e04db82a306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number displayed on the banner above the man wearing a black shirt?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 18.78125, "loss": 2.567354440689087 }, { "id": "identity_310710", "image": "CLEVR-Math/images/CLEVR_train_029032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all spherical objects. How many items remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.125, "loss": 1.9789212942123413 }, { "id": "identity_131378", "image": "PMC-VQA/images/PMC7532635_Fig1_265536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of ultrasound view is depicted in the image?" }, { "from": "gpt", "value": "The answer is Transverse or axial view" } ], "clip_score": 34.03125, "loss": 2.96836519241333 }, { "id": "identity_34199", "image": "VQA2.0/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there water in the picture?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 11.3515625, "loss": 3.479008436203003 }, { "id": "identity_307386", "image": "VQA2.0/images/138579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large crowd in the stands?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.296875, "loss": 2.9073259830474854 }, { "id": "identity_183697", "image": "PMC-VQA/images/PMC5664813_Fig5_233716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the object in the image?" }, { "from": "gpt", "value": "The answer is Vertical orientation" } ], "clip_score": 19.09375, "loss": 3.3757917881011963 }, { "id": "identity_268759", "image": "GeoQA+/images/236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, PA, PB, and AB are all tangent to circle O, with angle P equal to 60°. What is the measure of angle AOB? \nChoices:\n(A) 50°\n(B) 60°\n(C) 70°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.84375, "loss": 2.280128002166748 }, { "id": "identity_312145", "image": "UniGeo/images/3305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,CD是⊙O的直径,过点D的弦DE与半径OA平行,若∠D的度数是50°,则∠A的度数为多少?\n选项:\n(A) 50°\n(B) 40°\n(C) 30°\n(D) 25°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.46875, "loss": 2.546525001525879 }, { "id": "identity_2026", "image": "TQA/images/question_images/acid_rain_formation_6505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the picture below, which kills plant life, pollutes rivers and streams, and erodes stonework?\nChoices:\n(A) Oxygen\n(B) Rainfall\n(C) Wind\n(D) Acid Rain" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.0, "loss": 3.5142581462860107 }, { "id": "identity_19487", "image": "VQA2.0/images/544325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color shirt is the guy on the right wearing?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 18.09375, "loss": 4.6215057373046875 }, { "id": "identity_115240", "image": "Super-CLEVR/images/superCLEVR_new_001646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle in the image that shares the same color as the train?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.90625, "loss": 3.1968042850494385 }, { "id": "identity_80104", "image": "UniGeo/images/127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle CDE in the given diagram?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 32.9375, "loss": 1.6660003662109375 }, { "id": "identity_331686", "image": "PMC-VQA/images/PMC9227253_medicina-58-00698-f003_322110.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaflet is involved in mitral valve prolapse?\nChoices:\n(A) Anterior leaflet\n(B) Posterior leaflet\n(C) Both leaflets\n(D) The mitral valve does not have leaflets." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 4.150794982910156 }, { "id": "identity_305149", "image": "TextVQA/images/03235d26a98bcb0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this motorcycle?" }, { "from": "gpt", "value": "The answer is fc" } ], "clip_score": 26.265625, "loss": 4.695777416229248 }, { "id": "identity_220224", "image": "PMC-VQA/images/PMC4801364_pone.0151376.g002_13330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level of this microscopic image?" }, { "from": "gpt", "value": "The answer is High magnification (400x to 1000x)" } ], "clip_score": 30.421875, "loss": 1.5744235515594482 }, { "id": "identity_191731", "image": "PMC-VQA/images/PMC3349020_Fig1_137069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify any surgical implants or foreign objects in this radiograph?" }, { "from": "gpt", "value": "The answer is No surgical implants or foreign objects" } ], "clip_score": 33.3125, "loss": 2.463101387023926 }, { "id": "identity_228963", "image": "ScienceQA/images/1420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the definitions provided, compare and contrast the distribution of power in a federal system with that in a unitary system and a confederate system. Which system would likely result in the most balanced distribution of power between national and state governments, and why?" }, { "from": "gpt", "value": "The answer is Federal system, because it balances power between national and state governments" } ], "clip_score": 33.0625, "loss": 2.4320929050445557 }, { "id": "identity_265065", "image": "VQA2.0/images/195368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the floor made of marble?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.046875, "loss": 3.836088180541992 }, { "id": "identity_226897", "image": "ChartQA/images/multi_col_100910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average WOZ value price index from 2008 to 2016, excluding the years where the WOZ value price index was 104?" }, { "from": "gpt", "value": "The answer is 95.43" } ], "clip_score": 30.703125, "loss": 1.9797662496566772 }, { "id": "identity_106448", "image": "VQA-AS/images/16483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fireplace?" }, { "from": "gpt", "value": "The answer is The fireplace is white" } ], "clip_score": 28.015625, "loss": 3.080700635910034 }, { "id": "identity_293536", "image": "A-OKVQA/images/180488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What natural force will bring the snowboarder back to the ground after going over the jump?\nChoices:\n(A) gravity\n(B) inertia\n(C) fission\n(D) kinetic energy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.171875, "loss": 4.560173988342285 }, { "id": "identity_74223", "image": "TQA/images/question_images/glaciers_6931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the abbreviation 'CDW' stand for in the context of this image?" }, { "from": "gpt", "value": "The answer is Circumpolar Deep Water" } ], "clip_score": 23.484375, "loss": 2.014115810394287 }, { "id": "identity_17154", "image": "DocVQA/images/ftvn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Subject?" }, { "from": "gpt", "value": "The answer is Voucher Approval" } ], "clip_score": 20.703125, "loss": 3.075169324874878 }, { "id": "identity_153093", "image": "ScienceQA/images/13735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the worker likely measuring or testing for in the water?" }, { "from": "gpt", "value": "The answer is Water quality parameters" } ], "clip_score": 29.453125, "loss": 3.9615070819854736 }, { "id": "identity_90637", "image": "Geometry3K/images/1943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If x = 7√3, find the length of side a" }, { "from": "gpt", "value": "The answer is 10.5" } ], "clip_score": 31.5625, "loss": 2.0518596172332764 }, { "id": "identity_244008", "image": "A-OKVQA/images/197690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the switch-hitting Atlanta Braves legend at the plate and provide the year this player was inducted into the Baseball Hall of Fame" }, { "from": "gpt", "value": "The answer is 2018" } ], "clip_score": 25.296875, "loss": 2.4724550247192383 }, { "id": "identity_111164", "image": "VQA2.0/images/446794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the foliage in the image?" }, { "from": "gpt", "value": "The answer is Sparse and autumnal" } ], "clip_score": 18.703125, "loss": 3.714507579803467 }, { "id": "identity_137694", "image": "MapQA/images/map_4270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with no available data on the percentage of senators?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 35.53125, "loss": 2.765965700149536 }, { "id": "identity_214216", "image": "PMC-VQA/images/PMC2397490_F9_23367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cellular division in this microscopic image?" }, { "from": "gpt", "value": "The answer is No signs of cellular division visible" } ], "clip_score": 31.046875, "loss": 3.1000983715057373 }, { "id": "identity_278856", "image": "GeoQA+/images/5361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the circle O, AB is a diameter, and C, D are two points on the circle O with OC parallel to AD. If ∠DAC = 25°, what is the degree measure of ∠BOC?\nChoices:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 65°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.1875, "loss": 2.5157296657562256 }, { "id": "identity_311118", "image": "FigureQA/images/8161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Sky Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9296875, "loss": 3.682150363922119 }, { "id": "identity_241241", "image": "Super-CLEVR/images/superCLEVR_new_000551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects that are either rubber objects on the right side of the rubber biplane or objects that are on the left side of the small bicycle, and how many of these objects are not rubber?" }, { "from": "gpt", "value": "The answer is 3 objects in total, 1 of which is not rubber" } ], "clip_score": 28.65625, "loss": 1.6236307621002197 }, { "id": "identity_55814", "image": "DocVQA/images/sllk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What institution is associated with the author of the research?" }, { "from": "gpt", "value": "The answer is Harvard School of Dental Medicine" } ], "clip_score": 20.8125, "loss": 1.7176223993301392 }, { "id": "identity_261082", "image": "IconQA/images/36592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of all the planes in the sequence?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 16.1875, "loss": 2.067511796951294 }, { "id": "identity_116329", "image": "VQA2.0/images/477259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the Frisbee in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.375, "loss": 2.9906058311462402 }, { "id": "identity_25502", "image": "A-OKVQA/images/54039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are the vehicles covered?\nChoices:\n(A) to decorate\n(B) to steal\n(C) protection\n(D) to ship" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.796875, "loss": 4.186976432800293 }, { "id": "identity_197089", "image": "TextVQA/images/5693c81c6f066fca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the poster on the table?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 13.8203125, "loss": 4.5850934982299805 }, { "id": "identity_186791", "image": "PMC-VQA/images/PMC4143537_Fig2_315466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.171875, "loss": 2.812182664871216 }, { "id": "identity_225865", "image": "A-OKVQA/images/367896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the main components of this meal and explain how each contributes to making it a balanced meal" }, { "from": "gpt", "value": "The answer is Grilled chicken, roasted potatoes, and steamed broccoli contribute to balance" } ], "clip_score": 31.875, "loss": 2.537564754486084 }, { "id": "identity_34651", "image": "IconQA/images/34382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are there?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 31.546875, "loss": 2.072720766067505 }, { "id": "identity_108429", "image": "PMC-VQA/images/PMC6410651_jclintranslres-2-091-g002_448354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any surgical hardware or implants in this radiograph?" }, { "from": "gpt", "value": "The answer is No surgical hardware or implants visible" } ], "clip_score": 30.890625, "loss": 2.3614227771759033 }, { "id": "identity_294757", "image": "A-OKVQA/images/349193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of net is depicted in the image? \nOptions:\n(A) tennis\n(B) beach volleyball\n(C) fishing\n(D) butterfly" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.977574348449707 }, { "id": "identity_213971", "image": "TextVQA/images/73dd9803b9f1b96b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full title of the book as seen in the image?" }, { "from": "gpt", "value": "The answer is The Triumph & Tragedy of Lyndon Johnson" } ], "clip_score": 35.6875, "loss": 1.6746091842651367 }, { "id": "identity_8707", "image": "TabMWP/images/21537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mrs. Shen told students how many rounds to expect in the spelling bee based on previous years. According to the table, what was the rate of change between 2014 and 2015?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 39.96875, "loss": 3.133890390396118 }, { "id": "identity_313656", "image": "TextVQA/images/2b8800b25c6427f9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand on this monitor?" }, { "from": "gpt", "value": "The answer is samsung" } ], "clip_score": 28.15625, "loss": 2.942173957824707 }, { "id": "identity_157340", "image": "TQA/images/question_images/blastocyst_9034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the collection of cells inside the blastocyst?" }, { "from": "gpt", "value": "The answer is inner cell mass" } ], "clip_score": 36.78125, "loss": 2.977518081665039 }, { "id": "identity_120303", "image": "Super-CLEVR/images/superCLEVR_new_000595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects in the image that share the same color as the smallest bicycle?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.875, "loss": 3.1396889686584473 }, { "id": "identity_162603", "image": "GeoQA+/images/5700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the rectangle ABCD?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 31.421875, "loss": 2.5977649688720703 }, { "id": "identity_138218", "image": "MapQA/images/map_1047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range for the state of California?" }, { "from": "gpt", "value": "The answer is a percentage range" } ], "clip_score": 31.546875, "loss": 3.272367238998413 }, { "id": "identity_278093", "image": "Geometry3K/images/1207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the area of the blue sector.\nChoices:\n(A) 2.3 \\pi\n(B) 4.6 \\pi\n(C) 6.9 \\pi\n(D) 9.2 \\pi" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.0625, "loss": 2.7619311809539795 }, { "id": "identity_40129", "image": "DVQA/images/bar_train_00132502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy on the 'nature' dataset?" }, { "from": "gpt", "value": "The answer is roof" } ], "clip_score": 33.3125, "loss": 3.7015557289123535 }, { "id": "identity_73015", "image": "ScienceQA/images/15988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which direction is the Grevy's zebra facing in the image?" }, { "from": "gpt", "value": "The answer is To the left" } ], "clip_score": 34.6875, "loss": 3.3753392696380615 }, { "id": "identity_79530", "image": "UniGeo/images/2002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AB when the seat C is at a height of 0.9m?" }, { "from": "gpt", "value": "The answer is 0.136m" } ], "clip_score": 30.09375, "loss": 2.0965933799743652 }, { "id": "identity_29826", "image": "IconQA/images/12880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many hearts are there?\nChoices:\n(A) 9\n(B) 6\n(C) 1\n(D) 8\n(E) 10" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 25.765625, "loss": 3.0764834880828857 }, { "id": "identity_108872", "image": "A-OKVQA/images/333665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 19.453125, "loss": 2.2771260738372803 }, { "id": "identity_107104", "image": "A-OKVQA/images/507854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 13.046875, "loss": 3.0963141918182373 }, { "id": "identity_58691", "image": "DVQA/images/bar_train_00156900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 5 more units of 'rhythm' were sold, would it surpass the units sold of 'studio'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.734375, "loss": 3.042790651321411 }, { "id": "identity_36520", "image": "PMC-VQA/images/PMC4139810_ijms-15-11730-f005_314458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the photograph? \nChoices:\n(A) human eye \n(B) micro-tubes \n(C) chymotrypsin \n(D) SEM machine " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.734679222106934 }, { "id": "identity_97887", "image": "CLEVR-Math/images/CLEVR_train_048783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are yellow?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.296875, "loss": 2.4790074825286865 }, { "id": "identity_215766", "image": "A-OKVQA/images/534220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any water bodies visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, a sea or ocean" } ], "clip_score": 15.046875, "loss": 3.5984349250793457 }, { "id": "identity_271541", "image": "DVQA/images/bar_train_00127981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is symbolized by the color green?" }, { "from": "gpt", "value": "The answer is worry" } ], "clip_score": 20.96875, "loss": 3.7398345470428467 }, { "id": "identity_224789", "image": "CLEVR-Math/images/CLEVR_train_018584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of shapes minus the number of brown shapes?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 18.03125, "loss": 2.1694111824035645 }, { "id": "identity_333419", "image": "A-OKVQA/images/10484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is situated above the iron structure on top of the departing train?\nChoices:\n(A) coffee shop\n(B) factory\n(C) storefront\n(D) train track" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.9375, "loss": 4.274601936340332 }, { "id": "identity_150960", "image": "ScienceQA/images/2521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the spoon have any visible scratches or damages?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.59375, "loss": 2.8343844413757324 }, { "id": "identity_81851", "image": "UniGeo/images/1057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABDE是圆O的内接四边形,若∠A=53°,∠F=27°,则∠AED的度数为()" }, { "from": "gpt", "value": "The answer is 53°" } ], "clip_score": 32.125, "loss": 2.112793445587158 }, { "id": "identity_273105", "image": "TextVQA/images/7608bdc4a73f4ca8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the central idea or concept of the machine?" }, { "from": "gpt", "value": "The answer is evil kinevel" } ], "clip_score": 19.40625, "loss": 5.302825450897217 }, { "id": "identity_105506", "image": "VizWiz/images/VizWiz_train_00003673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seats are visible in the image?" }, { "from": "gpt", "value": "The answer is Three full seats are visible" } ], "clip_score": 22.46875, "loss": 4.590207099914551 }, { "id": "identity_96712", "image": "TabMWP/images/34933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long does the theater rehearsal last?" }, { "from": "gpt", "value": "The answer is 1 hour and 35 minutes" } ], "clip_score": 20.015625, "loss": 1.7271608114242554 }, { "id": "identity_301270", "image": "A-OKVQA/images/234929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was used to construct this? \nChoices:\n(A) metal\n(B) paper\n(C) plastic\n(D) legos" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.921875, "loss": 3.8959102630615234 }, { "id": "identity_117524", "image": "VizWiz/images/VizWiz_train_00016351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual flowers can be counted in the image?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 19.890625, "loss": 2.0469253063201904 }, { "id": "identity_257406", "image": "IconQA/images/78593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of all the dog images in the sequence?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 23.65625, "loss": 2.0056068897247314 }, { "id": "identity_213079", "image": "A-OKVQA/images/206975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vegetation in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.4296875, "loss": 3.111358165740967 }, { "id": "identity_199311", "image": "PMC-VQA/images/PMC7592484_f2_284001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the main airways clear in this image?" }, { "from": "gpt", "value": "The answer is Yes, the main airways are clear" } ], "clip_score": 23.6875, "loss": 1.9371131658554077 }, { "id": "identity_247228", "image": "ScienceQA/images/9170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a fruit fly produced by this cross will have red eyes if the red eye trait is dominant and the white eye trait is recessive, and how many of the offspring will have red eyes if 100 fruit flies are produced?" }, { "from": "gpt", "value": "The answer is 100 fruit flies" } ], "clip_score": 25.4375, "loss": 2.188864231109619 }, { "id": "identity_291296", "image": "A-OKVQA/images/222322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where have all the chairs been arranged? \nOptions:\n(A) on the beach\n(B) in the park\n(C) near the lake\n(D) in the backyard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.375, "loss": 4.056424617767334 }, { "id": "identity_100333", "image": "TabMWP/images/9804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of passengers with silver status?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.125, "loss": 2.5737524032592773 }, { "id": "identity_45855", "image": "FigureQA/images/80452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest y-axis value for Sky Blue?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.515625, "loss": 2.537019729614258 }, { "id": "identity_52386", "image": "FigureQA/images/37277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Medium Seafoam section in the pie chart?" }, { "from": "gpt", "value": "The answer is 33.33%" } ], "clip_score": 35.03125, "loss": 1.2683478593826294 }, { "id": "identity_3100", "image": "Super-CLEVR/images/superCLEVR_new_000479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of brown objects are either tiny rubber wagons or motorbikes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.09375, "loss": 2.3870179653167725 }, { "id": "identity_281213", "image": "ScienceQA/images/3939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the decomposer in this food web among the following organisms:\n(A) black crappie\n(B) bacteria\n(C) rotifer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.75, "loss": 3.8668015003204346 }, { "id": "identity_313088", "image": "DVQA/images/bar_train_00165210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars each a single solid color without any patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.453125, "loss": 3.438169002532959 }, { "id": "identity_154946", "image": "ScienceQA/images/12469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average final temperature of both cakes?" }, { "from": "gpt", "value": "The answer is 78.5°C" } ], "clip_score": 32.15625, "loss": 1.4572197198867798 }, { "id": "identity_229112", "image": "UniGeo/images/455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知⊙O的直径AE=10cm,∠B=∠EAC,若BC=6cm,则三角形ABC的面积是多少?" }, { "from": "gpt", "value": "The answer is 3√14 cm²" } ], "clip_score": 29.625, "loss": 2.3196094036102295 }, { "id": "identity_48769", "image": "PlotQA/images/53976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of trade with economies of South Asia for Chile in merchandise exports?" }, { "from": "gpt", "value": "The answer is 4%" } ], "clip_score": 32.6875, "loss": 2.190570831298828 }, { "id": "identity_133280", "image": "VQA2.0/images/310300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vegetables are visible on the cutting board?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.203125, "loss": 4.410166263580322 }, { "id": "identity_167148", "image": "IconQA/images/86897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were to represent a road, how many centimeters would a car have to travel to reach the halfway point?" }, { "from": "gpt", "value": "The answer is 2.75 centimeters" } ], "clip_score": 27.734375, "loss": 1.7158833742141724 }, { "id": "identity_22179", "image": "VQA2.0/images/169322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the glass of milk empty?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.8046875, "loss": 3.7855064868927 }, { "id": "identity_267431", "image": "CLEVR-Math/images/CLEVR_train_045982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 green cubes, what is the total number of green cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.84375, "loss": 1.877023696899414 }, { "id": "identity_335917", "image": "TabMWP/images/3429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dillon has $282. Can he afford to buy a hedgehog and a rabbit?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.5625, "loss": 3.2946770191192627 }, { "id": "identity_220545", "image": "VQA2.0/images/208882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the scarf in the image?" }, { "from": "gpt", "value": "The answer is Solid turquoise color" } ], "clip_score": 13.9140625, "loss": 2.796516180038452 }, { "id": "identity_311265", "image": "CLEVR-Math/images/CLEVR_train_013318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all small purple items. Remove all tiny purple metallic spheres. How many items remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.640625, "loss": 2.21379017829895 }, { "id": "identity_263373", "image": "GeoQA+/images/4350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In right triangle ABC, where angle C is 90 degrees, AC is 8, BC is 6, P is a moving point on side AC, and a parallelogram APBQ is constructed with PB and PA as sides. What is the minimum value of the diagonal PQ?\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.40625, "loss": 2.6645164489746094 }, { "id": "identity_169479", "image": "IconQA/images/40631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one plate is removed, what fraction of the utensils will be forks?" }, { "from": "gpt", "value": "The answer is 1/3" } ], "clip_score": 35.09375, "loss": 1.6011462211608887 }, { "id": "identity_74711", "image": "ScienceQA/images/19154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the knife in the image appear to be serrated or straight-edged?" }, { "from": "gpt", "value": "The answer is Straight-edged" } ], "clip_score": 24.8125, "loss": 2.147061824798584 }, { "id": "identity_110706", "image": "PMC-VQA/images/PMC3228743_F2_117897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of echocardiogram view is shown in the image?" }, { "from": "gpt", "value": "The answer is Apical four-chamber view" } ], "clip_score": 34.21875, "loss": 2.1278645992279053 }, { "id": "identity_55233", "image": "FigureQA/images/1916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart represent equal proportions for Light Seafoam and Medium Seafoam?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.84375, "loss": 3.2864251136779785 }, { "id": "identity_63009", "image": "ScienceQA/images/2140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representation of the North pole in the magnets?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 30.625, "loss": 3.6498970985412598 }, { "id": "identity_176772", "image": "TabMWP/images/37436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of matches won by players last summer?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 36.59375, "loss": 2.623492479324341 }, { "id": "identity_17522", "image": "TabMWP/images/25991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An arcade in Lowell keeps track of which prizes its customers get with their tickets. What fraction of the prizes were yo-yos? Simplify your answer." }, { "from": "gpt", "value": "The answer is 13/58" } ], "clip_score": 29.78125, "loss": 2.3492178916931152 }, { "id": "identity_133552", "image": "Super-CLEVR/images/superCLEVR_new_000377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object located in the center of the image?" }, { "from": "gpt", "value": "The answer is Oval" } ], "clip_score": 18.609375, "loss": 3.1795597076416016 }, { "id": "identity_172256", "image": "TabMWP/images/13043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which performance begins immediately after the acrobats?" }, { "from": "gpt", "value": "The answer is The magician" } ], "clip_score": 29.421875, "loss": 3.267259359359741 }, { "id": "identity_149027", "image": "DocVQA/images/yscw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date assigned for the project?" }, { "from": "gpt", "value": "The answer is June 26, 2001" } ], "clip_score": 23.34375, "loss": 1.453468918800354 }, { "id": "identity_194713", "image": "PMC-VQA/images/PMC4805545_fig4s1_14144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are labeled in the upper part of the image?" }, { "from": "gpt", "value": "The answer is Neuroblasts and GMCs (Ganglion Mother Cells)" } ], "clip_score": 33.5, "loss": 1.4245431423187256 }, { "id": "identity_265612", "image": "FigureQA/images/89105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Seafoam the curve with the largest area underneath it?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.625, "loss": 4.256603240966797 }, { "id": "identity_234347", "image": "ChartQA/images/two_col_3941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual foreign exchange earnings from tourism in India from 2010 to 2020 (in billion U.S. dollars)?" }, { "from": "gpt", "value": "The answer is 21.4 billion U.S. dollars" } ], "clip_score": 23.359375, "loss": 1.379704236984253 }, { "id": "identity_124847", "image": "TextVQA/images/9b946bd719f7c652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What channel number is associated with the Bright House Sports Network as seen on the signage?" }, { "from": "gpt", "value": "The answer is Channel 47" } ], "clip_score": 16.0, "loss": 2.226874351501465 }, { "id": "identity_259429", "image": "PMC-VQA/images/PMC8419393_rcr2840-fig-0002_52224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the extent of ground-glass opacity in the right lung parenchyma with the left lung parenchyma and determine which lung has a higher percentage of affected area" }, { "from": "gpt", "value": "The answer is Right lung parenchyma has a higher percentage of affected area" } ], "clip_score": 32.9375, "loss": 1.4134780168533325 }, { "id": "identity_246633", "image": "VizWiz/images/VizWiz_train_00002266.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key features of this cuckoo clock that distinguish it from other types of clocks?" }, { "from": "gpt", "value": "The answer is House shape, carved leaves, Roman numerals, pinecone weights" } ], "clip_score": 34.15625, "loss": 3.444096326828003 }, { "id": "identity_186957", "image": "TextVQA/images/edf3913fe963b3ef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the taxi in the foreground?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 25.765625, "loss": 3.5837960243225098 }, { "id": "identity_12965", "image": "UniGeo/images/2314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,E是AC上一点,EF⊥AB于点F,且\\frac{AE}{EF}=\\frac{3}{4},BC=10,则BC的弦心距OD等于()\nChoices:\n(A) \\frac{9}{4}\n(B) \\frac{15}{4}\n(C) 4\n(D) \\frac{12}{5}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 2.3035330772399902 }, { "id": "identity_172200", "image": "IconQA/images/19310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each triangle represents a tree, how many trees are there in the forest?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 33.84375, "loss": 2.4160947799682617 }, { "id": "identity_305826", "image": "DVQA/images/bar_train_00077293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the least number of units in total across all the stores?" }, { "from": "gpt", "value": "The answer is talk" } ], "clip_score": 28.125, "loss": 3.4847404956817627 }, { "id": "identity_25079", "image": "ScienceQA/images/2453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Vermont\n(B) Ohio\n(C) North Carolina\n(D) Massachusetts" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.484375, "loss": 3.9901726245880127 }, { "id": "identity_135326", "image": "DocVQA/images/rzld0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the invoice?" }, { "from": "gpt", "value": "The answer is November 7, 1996" } ], "clip_score": 28.53125, "loss": 1.6912633180618286 }, { "id": "identity_91237", "image": "Geometry3K/images/140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the acute angle formed by line t and the horizontal line b?" }, { "from": "gpt", "value": "The answer is 23°" } ], "clip_score": 34.125, "loss": 1.9270868301391602 }, { "id": "identity_62954", "image": "ScienceQA/images/7382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the magnets are brought closer, what will happen to the force between them?" }, { "from": "gpt", "value": "The answer is The force of attraction will increase" } ], "clip_score": 20.625, "loss": 2.9146485328674316 }, { "id": "identity_186007", "image": "VQA2.0/images/426500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are lined up along the street?" }, { "from": "gpt", "value": "The answer is Palm trees" } ], "clip_score": 23.296875, "loss": 2.532038927078247 }, { "id": "identity_166101", "image": "IconQA/images/70919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If it takes Ella 15 minutes to drive to work, what time will she arrive at work if she leaves now?" }, { "from": "gpt", "value": "The answer is 6:15" } ], "clip_score": 24.953125, "loss": 1.5301178693771362 }, { "id": "identity_56169", "image": "DVQA/images/bar_train_00012223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between 'lead' and 'flash' for the object 'pool'?" }, { "from": "gpt", "value": "The answer is Approximately 20%" } ], "clip_score": 29.875, "loss": 2.4150590896606445 }, { "id": "identity_18434", "image": "A-OKVQA/images/538809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this store sell?\nChoices:\n(A) running clothes\n(B) bikes\n(C) doughnuts\n(D) running shoes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.203125, "loss": 4.137902736663818 }, { "id": "identity_214865", "image": "TextVQA/images/a6f1345554d806b7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street number of the establishment in the image?" }, { "from": "gpt", "value": "The answer is 1204" } ], "clip_score": 14.4609375, "loss": 2.39396333694458 }, { "id": "identity_333143", "image": "PMC-VQA/images/PMC5922503_FIG3_306980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the pleural effusion located in the chest?\nChoices:\n(A) Right side.\n(B) Front side.\n(C) Left side.\n(D) Back side." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.515625, "loss": 4.334421634674072 }, { "id": "identity_161230", "image": "Geometry3K/images/115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If JK = 6y - 2 and GH = 5y + 1, find the value of y so that JK is parallel to GH in the quadrilateral" }, { "from": "gpt", "value": "The answer is y = 3" } ], "clip_score": 35.0, "loss": 1.8456554412841797 }, { "id": "identity_182576", "image": "PMC-VQA/images/PMC7856147_F7_358596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any broken or incomplete diatoms visible in the SEM image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.96875, "loss": 3.202070713043213 }, { "id": "identity_302799", "image": "GeoQA+/images/419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,五边形ABCDE内接于⊙O,若∠CAD=40°,则∠B+∠E的度数是多少?\nChoices:\n(A) 200°\n(B) 215°\n(C) 230°\n(D) 220°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.515625, "loss": 2.190239429473877 }, { "id": "identity_332290", "image": "TabMWP/images/6454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. When does the badminton event start?\nChoices:\n(A) 3:55 P.M.\n(B) 2:55 P.M.\n(C) 3:00 P.M.\n(D) 1:20 P.M." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.375, "loss": 3.707221269607544 }, { "id": "identity_199000", "image": "PMC-VQA/images/PMC4466355_fig1_396404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of lymphadenopathy in the mediastinum?" }, { "from": "gpt", "value": "The answer is No evidence of mediastinal lymphadenopathy is visible" } ], "clip_score": 26.609375, "loss": 2.040323257446289 }, { "id": "identity_141571", "image": "DVQA/images/bar_train_00094454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'film' algorithm more accurate than the 'world' algorithm?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.5625, "loss": 2.980198860168457 }, { "id": "identity_325615", "image": "Super-CLEVR/images/superCLEVR_new_000508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the red object in front of the small red rubber item behind the rubber vehicle made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 25.0, "loss": 3.2057714462280273 }, { "id": "identity_304259", "image": "CLEVR-Math/images/CLEVR_train_062320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add some small red objects. How many small red objects are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 24.734375, "loss": 2.6591455936431885 }, { "id": "identity_329525", "image": "ChartQA/images/3770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What % of people think Germany is playing a more important role in world affairs now compared to 10 years ago?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 36.40625, "loss": 2.771066427230835 }, { "id": "identity_302111", "image": "PMC-VQA/images/PMC7981177_fig1_396137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What position is depicted in the image?\nChoices:\n(A) Extension position\n(B) Flexion position\n(C) Supine position\n(D) Prone position" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.609375, "loss": 4.937926292419434 }, { "id": "identity_42199", "image": "FigureQA/images/27463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections are there in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.640625, "loss": 2.1764278411865234 }, { "id": "identity_276148", "image": "TextVQA/images/8d6ae29de134eeef.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the brand of the typewriter?" }, { "from": "gpt", "value": "The answer is hermis" } ], "clip_score": 36.03125, "loss": 4.844217777252197 }, { "id": "identity_221291", "image": "TextVQA/images/b96afd4dee26c117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the door?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.15625, "loss": 3.725426435470581 }, { "id": "identity_311183", "image": "FigureQA/images/11250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson larger than Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.40625, "loss": 4.007101535797119 }, { "id": "identity_314957", "image": "ScienceQA/images/5412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Cuba\n(B) Saint Vincent and the Grenadines\n(C) Grenada\n(D) Trinidad and Tobago" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.421875, "loss": 4.172192096710205 }, { "id": "identity_180188", "image": "IconQA/images/20301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the total number of sides are on the first two shapes from the left?" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 23.5, "loss": 1.511430263519287 }, { "id": "identity_283296", "image": "A-OKVQA/images/487020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the birds that are seen outside of their cage here sold?\nChoices:\n(A) dinner\n(B) hat feathers\n(C) stealing jewels\n(D) pets" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.484375, "loss": 4.188265323638916 }, { "id": "identity_86038", "image": "GeoQA+/images/3893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠OBC=50° and OC is a radius, what is the measure of ∠OCB?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 29.75, "loss": 1.7574994564056396 }, { "id": "identity_121036", "image": "TextVQA/images/350cfcd3adaa9891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the streetcar in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 28.09375, "loss": 2.5582797527313232 }, { "id": "identity_299341", "image": "DocVQA/images/qgcm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is specified in the document?" }, { "from": "gpt", "value": "The answer is June 4, 1973" } ], "clip_score": 18.15625, "loss": 1.9165573120117188 }, { "id": "identity_183016", "image": "PMC-VQA/images/PMC6022295_Fig2_334194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of bowel obstruction in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.78125, "loss": 3.0607552528381348 }, { "id": "identity_263944", "image": "VQA-AS/images/9878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what way is the family maintaining warmth?" }, { "from": "gpt", "value": "The answer is fire" } ], "clip_score": 26.203125, "loss": 4.1603593826293945 }, { "id": "identity_235480", "image": "PlotQA/images/100226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of days required to register property from 2011 to 2014, and how does it compare to the average number of days required from 2009 to 2010?" }, { "from": "gpt", "value": "The answer is 9 days" } ], "clip_score": 26.546875, "loss": 3.2233662605285645 }, { "id": "identity_298436", "image": "PMC-VQA/images/PMC6425693_F6_454105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the 3D reconstructions in the snapshots display?\nChoices:\n(A) ASC specks within RGCs \n(B) ASC specks within microglia \n(C) ASC specks colocalizing with astroglia \n(D) ASC specks outside of the retina" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.75, "loss": 4.530329704284668 }, { "id": "identity_182707", "image": "PMC-VQA/images/PMC4794473_Fig3_12315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image symmetrical or asymmetrical?" }, { "from": "gpt", "value": "The answer is Asymmetrical" } ], "clip_score": 16.375, "loss": 2.0662143230438232 }, { "id": "identity_75779", "image": "AI2D/images/2137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final stage of a star the same size as Sol?" }, { "from": "gpt", "value": "The answer is White Dwarf" } ], "clip_score": 29.6875, "loss": 2.255227565765381 }, { "id": "identity_88861", "image": "Geometry3K/images/1648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the angles labeled 2x + 9 and 4x - 5?" }, { "from": "gpt", "value": "The answer is 180" } ], "clip_score": 33.09375, "loss": 2.117551803588867 }, { "id": "identity_317403", "image": "DVQA/images/bar_train_00040023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart shown in a logarithmic scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.15625, "loss": 3.629549503326416 }, { "id": "identity_183806", "image": "A-OKVQA/images/518816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is grazing on the grass?" }, { "from": "gpt", "value": "The answer is Sheep" } ], "clip_score": 24.15625, "loss": 2.753384590148926 }, { "id": "identity_289541", "image": "A-OKVQA/images/550666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of products does Hawes Garage repair?\nChoices:\n(A) software\n(B) mobiles\n(C) appliances\n(D) automobiles" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.2265625, "loss": 3.9321084022521973 }, { "id": "identity_148814", "image": "MapQA/images/map_4433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Texas?" }, { "from": "gpt", "value": "The answer is 457001-569096" } ], "clip_score": 26.390625, "loss": 2.3363122940063477 }, { "id": "identity_122758", "image": "TextVQA/images/e9e8663edee85683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the DJs performing at the event?" }, { "from": "gpt", "value": "The answer is DJ ErrE, Alex ErrE, Fabry ErrE" } ], "clip_score": 16.109375, "loss": 3.016846179962158 }, { "id": "identity_23890", "image": "A-OKVQA/images/433662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What item here can have multiple meanings?\nChoices:\n(A) trunk\n(B) cow\n(C) strawberry\n(D) reed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.96875, "loss": 3.6900551319122314 }, { "id": "identity_268961", "image": "TQA/images/question_images/seasons_1033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After the autumnal equinox, what is the seasonal arrangement that follows?\nChoices:\n(A) summer solstice\n(B) sun\n(C) winter solstice\n(D) vernal eqionox" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.21875, "loss": 3.63153076171875 }, { "id": "identity_182203", "image": "PMC-VQA/images/PMC7162474_pone.0231523.g002_152962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the depth setting for this ultrasound image?" }, { "from": "gpt", "value": "The answer is 2.50 cm" } ], "clip_score": 23.953125, "loss": 2.452457904815674 }, { "id": "identity_53828", "image": "PlotQA/images/76049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage of part-time workers between 1996 and 1999?" }, { "from": "gpt", "value": "The answer is Approximately 0.5%" } ], "clip_score": 30.6875, "loss": 2.2278339862823486 }, { "id": "identity_212677", "image": "PMC-VQA/images/PMC5058823_F2_82761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image of the lungs?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 36.21875, "loss": 2.6525065898895264 }, { "id": "identity_163936", "image": "GeoQA+/images/3004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if angle ABD is 20 degrees, what is the length of diagonal BD if AB is 10 units?" }, { "from": "gpt", "value": "The answer is approximately 10.17 units" } ], "clip_score": 30.828125, "loss": 1.8242541551589966 }, { "id": "identity_189162", "image": "VQA2.0/images/308436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is visible around the giraffe?" }, { "from": "gpt", "value": "The answer is Grass and palm trees" } ], "clip_score": 30.8125, "loss": 2.2259812355041504 }, { "id": "identity_54011", "image": "MapQA/images/map_4994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the state of Texas?" }, { "from": "gpt", "value": "The answer is 745-800" } ], "clip_score": 29.171875, "loss": 2.238530397415161 }, { "id": "identity_175534", "image": "IconQA/images/54855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of blue sections to total sections on the spinner?" }, { "from": "gpt", "value": "The answer is 1:2" } ], "clip_score": 30.921875, "loss": 1.4293324947357178 }, { "id": "identity_120436", "image": "PMC-VQA/images/PMC7016482_fig3_116074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical landmarks are used for cephalometric analysis in this image?" }, { "from": "gpt", "value": "The answer is Nasion, Orbitale, Porion, A point, B point, Pogonion, Menton" } ], "clip_score": 30.4375, "loss": 1.6479932069778442 }, { "id": "identity_256134", "image": "UniGeo/images/1727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在Rt△ABC中,∠BAC=90°. 将Rt△ABC绕点C按逆时针方向旋转48°得到Rt△A′B′C,点A在边B′C上,求∠A′CB′的大小。" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 27.34375, "loss": 1.8766371011734009 }, { "id": "identity_171125", "image": "TabMWP/images/29200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many activities have more parents participating than children?" }, { "from": "gpt", "value": "The answer is 1 activity (Singing)" } ], "clip_score": 30.984375, "loss": 2.6633999347686768 }, { "id": "identity_238465", "image": "Geometry3K/images/1531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of side YX and the area of triangle ZYX. Round to the nearest tenth" }, { "from": "gpt", "value": "The answer is 182.1" } ], "clip_score": 32.15625, "loss": 2.8897323608398438 }, { "id": "identity_291672", "image": "AI2D/images/699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the given objects, which one contains various metallic oxides like alumina or zirconia that were previously considered elements that are hard to reduce? \n(A) Moon\n(B) Stars\n(C) Earth\n(D) Sun" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.34375, "loss": 4.070249557495117 }, { "id": "identity_154179", "image": "AI2D/images/3664.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bundle is located on the left side of the heart?" }, { "from": "gpt", "value": "The answer is Left posterior bundle" } ], "clip_score": 34.8125, "loss": 3.5009732246398926 }, { "id": "identity_16147", "image": "IconQA/images/78663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are on the frame?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 33.0625, "loss": 2.1465439796447754 }, { "id": "identity_47154", "image": "DocVQA/images/krlf0023_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the effective date of the agreement?" }, { "from": "gpt", "value": "The answer is 07/01/1998" } ], "clip_score": 19.015625, "loss": 1.4626585245132446 }, { "id": "identity_135649", "image": "DVQA/images/bar_train_00077007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'river' have a greater 'medium' value than 'butter'?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.953125, "loss": 3.1326329708099365 }, { "id": "identity_194731", "image": "PMC-VQA/images/PMC7972202_Fig1_394605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hemorrhage in the brain on this CT scan?" }, { "from": "gpt", "value": "The answer is No evidence of hemorrhage" } ], "clip_score": 33.96875, "loss": 2.5166051387786865 }, { "id": "identity_93098", "image": "TabMWP/images/33726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost per pound if you buy 3 pounds of holiday confetti and 2 pounds of heart confetti?" }, { "from": "gpt", "value": "The answer is $10/pound" } ], "clip_score": 29.9375, "loss": 1.789696455001831 }, { "id": "identity_89574", "image": "GeoQA+/images/4090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If BC is the diameter of circle O and AD is perpendicular to BC, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.28125, "loss": 1.5035064220428467 }, { "id": "identity_97477", "image": "TabMWP/images/6844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of votes between BBQ and sour cream and onion flavors?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 32.1875, "loss": 2.7846057415008545 }, { "id": "identity_38482", "image": "A-OKVQA/images/107225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns the billboard illuminated in the most golden lighting above the NY Police dept?\nChoices:\n(A) izod\n(B) duane reade\n(C) yahoo\n(D) police" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.21875, "loss": 4.047518730163574 }, { "id": "identity_27866", "image": "A-OKVQA/images/44483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do people under the umbrellas here do?\nChoices:\n(A) knit\n(B) cheer\n(C) dine\n(D) sleep" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.015625, "loss": 4.500632286071777 }, { "id": "identity_27738", "image": "Super-CLEVR/images/superCLEVR_new_001616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of gray things right of the small metal cruiser the same as the number of green objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.65625, "loss": 5.151463031768799 }, { "id": "identity_267460", "image": "UniGeo/images/1579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the quadrilateral ABCD shown in the figure, where AD is parallel to BC and AC and BD intersect at point O, and EF is parallel to AD passing through point O intersecting AB at points E and F. If AE = 2, BE = 5, and OD = 3, what is the length of BD?\nChoices:\n(A) 6\n(B) 15/2\n(C) 21/2\n(D) 10/3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.90625, "loss": 2.417961835861206 }, { "id": "identity_213093", "image": "VQA2.0/images/139658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Midday" } ], "clip_score": 11.15625, "loss": 2.7119686603546143 }, { "id": "identity_1002", "image": "AI2D/images/4167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many stages are shown in the cycle?\nChoices:\n(A) 67\n(B) 0\n(C) 9\n(D) 5" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.1875, "loss": 2.5922160148620605 }, { "id": "identity_205928", "image": "PMC-VQA/images/PMC3635941_F3_200806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible abnormality in the MRI image?" }, { "from": "gpt", "value": "The answer is No visible abnormality" } ], "clip_score": 30.8125, "loss": 2.970574378967285 }, { "id": "identity_144113", "image": "MapQA/images/map_2935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of defendants in the state of Michigan?" }, { "from": "gpt", "value": "The answer is 74.6%-75.2%" } ], "clip_score": 36.46875, "loss": 1.7881356477737427 }, { "id": "identity_69241", "image": "AI2D/images/4330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common name for the fruit labeled as \"Noni\" in the image?" }, { "from": "gpt", "value": "The answer is Indian Mulberry" } ], "clip_score": 26.203125, "loss": 3.308117389678955 }, { "id": "identity_85042", "image": "Geometry3K/images/1298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side KL in triangle JKL?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 34.53125, "loss": 2.552492141723633 }, { "id": "identity_163604", "image": "GeoQA+/images/3388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with BD as a median, if BE is twice the length of DE and BC is 6, what is the length of segment BF?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.125, "loss": 2.2203547954559326 }, { "id": "identity_245074", "image": "TextVQA/images/306ab0290a2df966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the model number of the bulldozer with the word \"case\" on its neck?" }, { "from": "gpt", "value": "The answer is 580" } ], "clip_score": 22.296875, "loss": 1.6742210388183594 }, { "id": "identity_21229", "image": "GeoQA+/images/4974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,D、E分别是AB和AC的中点,S四边形BCED=15,则S△ABC=()\nChoices:\n(A) 30\n(B) 25\n(C) 22.5\n(D) 20" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.234375, "loss": 2.596367359161377 }, { "id": "identity_2483", "image": "A-OKVQA/images/207491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting does this bus drive?\nChoices:\n(A) rural\n(B) sand desert\n(C) urban\n(D) industrial" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5390625, "loss": 4.310641288757324 }, { "id": "identity_212244", "image": "TextVQA/images/27f04a83912ca745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any Pepsi bottles with a different label design?" }, { "from": "gpt", "value": "The answer is Yes, one bottle has a \"Pepsi Cherry Vanilla\" label" } ], "clip_score": 32.03125, "loss": 2.5887258052825928 }, { "id": "identity_324288", "image": "A-OKVQA/images/174019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it safe to cross here at this moment?\nChoices:\n(A) unknown\n(B) no\n(C) perhaps\n(D) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.234375, "loss": 3.7743144035339355 }, { "id": "identity_174795", "image": "IconQA/images/18031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 4 small squares, how many squares will remain?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 27.015625, "loss": 2.386462450027466 }, { "id": "identity_114217", "image": "TextVQA/images/0a1587205dff467f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the coin appear to be in a protective cover?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.421875, "loss": 3.2799289226531982 }, { "id": "identity_157778", "image": "TQA/images/abc_question_images/continental_drift_19083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which section will evolve into the continent of Australia?" }, { "from": "gpt", "value": "The answer is Section K" } ], "clip_score": 31.859375, "loss": 3.795910120010376 }, { "id": "identity_220912", "image": "VQA-AS/images/6242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the sofa?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 26.96875, "loss": 4.586691379547119 }, { "id": "identity_306833", "image": "Geometry3K/images/1229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 12" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.703125, "loss": 2.7314891815185547 }, { "id": "identity_307397", "image": "A-OKVQA/images/71281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the type of fencing used to contain these animals?\nChoices:\n(A) wood\n(B) iron\n(C) wire\n(D) electric" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.078125, "loss": 3.9350924491882324 }, { "id": "identity_241390", "image": "VQA2.0/images/203602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the knife and the surface the apple is on?" }, { "from": "gpt", "value": "The answer is approximately 30 degrees" } ], "clip_score": 32.78125, "loss": 2.0029051303863525 }, { "id": "identity_25792", "image": "IconQA/images/14739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are there?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.453125, "loss": 1.8606245517730713 }, { "id": "identity_259020", "image": "DocVQA/images/qqjw0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of unpaid/uncompleted change orders?" }, { "from": "gpt", "value": "The answer is $53,775" } ], "clip_score": 23.5, "loss": 2.3181817531585693 }, { "id": "identity_333879", "image": "UniGeo/images/334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图1,在⊙O中,若点C是⁀{AB}中点,∠OAB=50°,则∠BOC的度数是多少?\n选项:\n(A) 40°\n(B) 45°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.703125, "loss": 2.548595905303955 }, { "id": "identity_285159", "image": "AI2D/images/4663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the largest part among the options provided? \n(A) b\n(B) i\n(C) l\n(D) j" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.90625, "loss": 3.7824082374572754 }, { "id": "identity_241838", "image": "VQA-AS/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds is the boy looking at, and what other objects are near the birds?" }, { "from": "gpt", "value": "The answer is Two birds and pieces of bread" } ], "clip_score": 23.1875, "loss": 4.143657684326172 }, { "id": "identity_94836", "image": "IconQA/images/13115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of squares in the top row to the squares in the bottom row?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 28.390625, "loss": 1.2838492393493652 }, { "id": "identity_333219", "image": "PlotQA/images/84921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of the horizontal axis?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 17.78125, "loss": 4.1356096267700195 }, { "id": "identity_79159", "image": "GeoQA+/images/3432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is parallel to CD and AC equals BC, if angle 1 measures 70 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 33.40625, "loss": 1.629191279411316 }, { "id": "identity_106674", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red stools are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.234375, "loss": 4.443757057189941 }, { "id": "identity_206969", "image": "PMC-VQA/images/PMC7447331_F1_239979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the functional areas depicted in the same hemisphere of the brain?" }, { "from": "gpt", "value": "The answer is No, they are not all in the same hemisphere" } ], "clip_score": 25.6875, "loss": 1.6654479503631592 }, { "id": "identity_125871", "image": "A-OKVQA/images/359394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus behind the one in the foreground?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.25, "loss": 3.135124921798706 }, { "id": "identity_93379", "image": "TabMWP/images/27617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of butterscotch candies per bag?" }, { "from": "gpt", "value": "The answer is 61" } ], "clip_score": 35.375, "loss": 3.12996244430542 }, { "id": "identity_150359", "image": "AI2D/images/3971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of prey would the snake in the image likely hunt?" }, { "from": "gpt", "value": "The answer is Small mammals, birds, or rodents" } ], "clip_score": 26.5, "loss": 2.1289303302764893 }, { "id": "identity_146880", "image": "DocVQA/images/xmkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person whose application for Fellowship is mentioned?" }, { "from": "gpt", "value": "The answer is Dr. George M. Saunders" } ], "clip_score": 22.046875, "loss": 2.742814779281616 }, { "id": "identity_138844", "image": "DocVQA/images/mhpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of response does the sender of the letter suggest the study results might be attributed to?" }, { "from": "gpt", "value": "The answer is adolescent mischievousness" } ], "clip_score": 18.21875, "loss": 3.859973192214966 }, { "id": "identity_93278", "image": "IconQA/images/34083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you arrange the shapes to form a straight line, how many sides will be on the outer edge?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 27.9375, "loss": 1.9290374517440796 }, { "id": "identity_56443", "image": "FigureQA/images/32846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal distribution of colors?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.375, "loss": 3.594891309738159 }, { "id": "identity_166850", "image": "TabMWP/images/20639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy one of each stamp?" }, { "from": "gpt", "value": "The answer is $0.25" } ], "clip_score": 28.4375, "loss": 1.6989498138427734 }, { "id": "identity_81475", "image": "UniGeo/images/779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point O is the center of the inscribed circle of triangle ABC, and the radius of the circle is 4 units, what is the length of the altitude from vertex A to side BC?" }, { "from": "gpt", "value": "The answer is 4√3 units" } ], "clip_score": 33.625, "loss": 1.8437955379486084 }, { "id": "identity_281439", "image": "TabMWP/images/31936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money required by Darrell to purchase a guitar, a piano, and a trombone?" }, { "from": "gpt", "value": "The answer is 6,783" } ], "clip_score": 32.25, "loss": 2.7518744468688965 }, { "id": "identity_76530", "image": "TQA/images/question_images/human_system_digestive_1395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organ that stores undigested food before it is excreted?" }, { "from": "gpt", "value": "The answer is Large Intestine" } ], "clip_score": 30.40625, "loss": 1.5705515146255493 }, { "id": "identity_157171", "image": "ScienceQA/images/12046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the north?" }, { "from": "gpt", "value": "The answer is Oregon" } ], "clip_score": 31.3125, "loss": 3.1540515422821045 }, { "id": "identity_188844", "image": "PMC-VQA/images/PMC5646301_fig7_228418.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label \"S1\" referring to in the image?" }, { "from": "gpt", "value": "The answer is First sacral vertebra" } ], "clip_score": 26.703125, "loss": 2.9958019256591797 }, { "id": "identity_317770", "image": "UniGeo/images/1670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,有一个平行四边形和一个正方形,其中一个点在一条边上。若一个角为35°,另一个角为15°,则∠B的度数是多少?()\n选项:\n(A) 50\n(B) 55\n(C) 70\n(D) 75" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.78125, "loss": 2.561600923538208 }, { "id": "identity_308323", "image": "UniGeo/images/1262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,PA、PB分别与半径为3的圆O相切于点A,B,直线CD分别交PA、PB于点C,D,并切圆O于点E,当PO=5时,△PCD的周长为()\n选项:\n(A) 4\n(B) 5\n(C) 8\n(D) 10" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.375, "loss": 2.4201443195343018 }, { "id": "identity_255481", "image": "ChartQA/images/two_col_1092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined global revenue share of North America, Western Europe, and Japan regions for Guicci?" }, { "from": "gpt", "value": "The answer is 51%" } ], "clip_score": 33.78125, "loss": 1.828330636024475 }, { "id": "identity_241248", "image": "UniGeo/images/495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示, 圆锥的母线长为10cm, 其侧面展开图是圆心角为216°的扇形, 求该圆锥的底面半径和高。" }, { "from": "gpt", "value": "The answer is 底面半径为6cm, 高为8cm" } ], "clip_score": 25.203125, "loss": 1.0614484548568726 }, { "id": "identity_27398", "image": "MapQA/images/map_3378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in the Northeast ?" }, { "from": "gpt", "value": "The answer is 48.0%-49.7%" } ], "clip_score": 24.125, "loss": 2.3697752952575684 }, { "id": "identity_334401", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black asterisk in the image indicate?\nChoices:\n(A) Connective tissue \n(B) M2 macrophages \n(C) Newly formed bone \n(D) Xenogeneic BSM granules" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 4.583466529846191 }, { "id": "identity_308814", "image": "GeoQA+/images/900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,AB=BC,∠ABC=120°,AD为⊙O的直径,AD=10,那么BD是多少?\nChoices:\n(A) 8\n(B) 5\n(C) 8√{3}\n(D) 5√{3}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.3125, "loss": 2.4632320404052734 }, { "id": "identity_31600", "image": "TQA/images/question_images/human_system_circulatory_3637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many types of artery are shown in the diagram?\nChoices:\n(A) 2\n(B) 4\n(C) 1\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.296875, "loss": 3.4928066730499268 }, { "id": "identity_168568", "image": "TabMWP/images/32395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price decreases from $370 to $270, by how much does the quantity demanded increase?" }, { "from": "gpt", "value": "The answer is 1,600" } ], "clip_score": 32.28125, "loss": 1.8482296466827393 }, { "id": "identity_172558", "image": "TabMWP/images/10093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did Josiah walk an even or odd number of kilometers on most days?" }, { "from": "gpt", "value": "The answer is Odd" } ], "clip_score": 25.71875, "loss": 2.206503391265869 }, { "id": "identity_245605", "image": "IconQA/images/17311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that the spinner will land on green if it is spun twice?" }, { "from": "gpt", "value": "The answer is 1/9" } ], "clip_score": 28.234375, "loss": 2.3041677474975586 }, { "id": "identity_195885", "image": "VizWiz/images/VizWiz_train_00014928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different food items are on the plate?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.4375, "loss": 2.1768245697021484 }, { "id": "identity_317637", "image": "TabMWP/images/10226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of donors are at the bronze level according to the Princeton Symphony's categorization? Simplify your answer." }, { "from": "gpt", "value": "The answer is 1/27" } ], "clip_score": 33.25, "loss": 2.459038496017456 }, { "id": "identity_271399", "image": "FigureQA/images/59490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Steel Blue the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.84375, "loss": 3.9713387489318848 }, { "id": "identity_149894", "image": "DocVQA/images/slyc0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a Motor Transport section listed in the document?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.59375, "loss": 3.090914726257324 }, { "id": "identity_15539", "image": "TextVQA/images/51d58f6342cf64c1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of survival kit is pictured?" }, { "from": "gpt", "value": "The answer is shemergency" } ], "clip_score": 17.234375, "loss": 3.5780558586120605 }, { "id": "identity_47400", "image": "DVQA/images/bar_train_00128322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets have an accuracy of 60 or higher for the 'mode' algorithm?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.09375, "loss": 2.308184862136841 }, { "id": "identity_59553", "image": "DocVQA/images/gldj0038_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the G/L code mentioned on the voucher?" }, { "from": "gpt", "value": "The answer is 04-90-0218-9010-774" } ], "clip_score": 22.5625, "loss": 2.44783616065979 }, { "id": "identity_277366", "image": "AI2D/images/3599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the digestive system extends from the mouth and nasal cavities to the larynx, where it connects to the esophagus? \n(A) The pharynx.\n(B) The bronchus.\n(C) The trachea.\n(D) The diaphragm." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.46875, "loss": 3.017808675765991 }, { "id": "identity_77156", "image": "AI2D/images/3089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dendrites are visible in this image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.375, "loss": 4.3158721923828125 }, { "id": "identity_275395", "image": "VQA2.0/images/99313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this animal?" }, { "from": "gpt", "value": "The answer is giraffe" } ], "clip_score": 17.9375, "loss": 2.272747278213501 }, { "id": "identity_95690", "image": "TabMWP/images/9499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many farmers have more than 32 sheep?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.28125, "loss": 2.436953544616699 }, { "id": "identity_202153", "image": "TextVQA/images/3811285c421d2ad2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place at the table?" }, { "from": "gpt", "value": "The answer is LEGO building activity" } ], "clip_score": 23.453125, "loss": 3.3924105167388916 }, { "id": "identity_171034", "image": "TabMWP/images/7843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more pigs does Middle Farm have compared to Hill Farm?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 25.9375, "loss": 2.213853597640991 }, { "id": "identity_83561", "image": "GeoQA+/images/4926.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC in the given diagram?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 30.296875, "loss": 1.7116146087646484 }, { "id": "identity_109102", "image": "VQA2.0/images/111235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is At least 5" } ], "clip_score": 17.203125, "loss": 2.945887327194214 }, { "id": "identity_65332", "image": "ScienceQA/images/13788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is located to the southeast of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 26.734375, "loss": 3.047639846801758 }, { "id": "identity_36852", "image": "DVQA/images/bar_train_00051537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 27.375, "loss": 2.676516532897949 }, { "id": "identity_270802", "image": "VQA2.0/images/408846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they currently participating in a march?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 13.078125, "loss": 3.2410950660705566 }, { "id": "identity_127796", "image": "PMC-VQA/images/PMC6541958_fig2_487237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is likely used to capture this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 29.34375, "loss": 2.937257766723633 }, { "id": "identity_32457", "image": "TabMWP/images/37116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For a statistics project, Sasha observed the number of hits and misses scored by some baseball players during batting practice. How many pitches did the right fielder have?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 39.1875, "loss": 2.7550926208496094 }, { "id": "identity_210804", "image": "A-OKVQA/images/200033.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.375, "loss": 3.3099522590637207 }, { "id": "identity_232975", "image": "DVQA/images/bar_train_00174757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of all the algorithms, and how much does the accuracy of the most accurate algorithm exceed this average?" }, { "from": "gpt", "value": "The answer is 3.17" } ], "clip_score": 37.8125, "loss": 2.653271198272705 }, { "id": "identity_315524", "image": "GeoQA+/images/847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如右图,在一个三角形中,已知一个角是80°、另一个角是60°,DE∥BC,那么∠AED的大小是()\n\nChoices:\n(A) 40°\n(B) 60°\n(C) 80°\n(D) 120°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0625, "loss": 2.7196707725524902 }, { "id": "identity_34448", "image": "PMC-VQA/images/PMC5337383_fig1_134483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging finding in the picture? \nChoices:\n(A) A small mass with no impact on surrounding organs. \n(B) A large mass that has infiltrated surrounding vital organs. \n(C) A large mass with multiple areas of low attenuation without impacting vital organs. \n(D) A large mass with multiple areas of low attenuation that has invaded the heart and other vital organs. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.109375, "loss": 3.5420985221862793 }, { "id": "identity_211169", "image": "A-OKVQA/images/466083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baseball bats can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.484375, "loss": 4.303469181060791 }, { "id": "identity_259524", "image": "DocVQA/images/kmnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for the Epidemiology Summer Session Faculty to return the enclosed listing with their notations?" }, { "from": "gpt", "value": "The answer is May 24" } ], "clip_score": 18.390625, "loss": 2.7377805709838867 }, { "id": "identity_176071", "image": "IconQA/images/30499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern sequence of the images?" }, { "from": "gpt", "value": "The answer is crab, blue bucket, green bucket" } ], "clip_score": 32.65625, "loss": 2.7275264263153076 }, { "id": "identity_195237", "image": "PMC-VQA/images/PMC7213657_FIG3_166476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the yellow star in the image?" }, { "from": "gpt", "value": "The answer is Marker for area of interest or abnormality" } ], "clip_score": 18.328125, "loss": 3.030118227005005 }, { "id": "identity_336032", "image": "ChartQA/images/7480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the largest segment value twice the value of the Major reduction segment?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.046875, "loss": 3.4335052967071533 }, { "id": "identity_164491", "image": "UniGeo/images/1214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the circumference of the circle is 24π, what is the length of segment EP?" }, { "from": "gpt", "value": "The answer is 12√5" } ], "clip_score": 30.046875, "loss": 2.440711498260498 }, { "id": "identity_243360", "image": "TabMWP/images/31147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the number of emails Cassie received per day over the entire week?" }, { "from": "gpt", "value": "The answer is -0.75" } ], "clip_score": 33.96875, "loss": 2.553957223892212 }, { "id": "identity_157717", "image": "ScienceQA/images/1791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dunes are visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 28.171875, "loss": 4.266891002655029 }, { "id": "identity_310882", "image": "GeoQA+/images/5257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在▱ABCD中,∠ABC和∠BCD的平分线交边AD于点E,且BE=12,CE=5,则点A到BC的距离是多少?\n\nChoices:\n(A) \\frac{12}{5}\n(B) 4\n(C) \\frac{60}{13}\n(D) \\frac{60}{7}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.84375, "loss": 2.3500099182128906 }, { "id": "identity_64047", "image": "TQA/images/question_images/atomic_mass_number_9002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the atomic number of Cl?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 27.203125, "loss": 1.5631294250488281 }, { "id": "identity_113514", "image": "Super-CLEVR/images/superCLEVR_new_000641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any airplanes in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.484375, "loss": 3.1608054637908936 }, { "id": "identity_235104", "image": "A-OKVQA/images/173161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which country, known for its ancient inventions, were umbrellas most likely invented, and what other significant invention is attributed to this country?" }, { "from": "gpt", "value": "The answer is China, and another significant invention is paper" } ], "clip_score": 22.421875, "loss": 1.7692323923110962 }, { "id": "identity_294035", "image": "A-OKVQA/images/469134.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lunch option for the bikers shown here? Will it be hot dogs, steak, pizza, or salads?\nChoices:\n(A) hot dogs\n(B) steak\n(C) pizza\n(D) salads" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.71875, "loss": 3.7860405445098877 }, { "id": "identity_161821", "image": "GeoQA+/images/2359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is the diameter of circle O and ∠ACD=30°, what is the length of AD if AC=10 units?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 33.6875, "loss": 1.872248649597168 }, { "id": "identity_271786", "image": "UniGeo/images/3332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, points A, B, and C lie on circle O, with line segment ABCO. If ∠AOC is 140°, what is the measure of ∠B? \nChoices:\n(A) 140°\n(B) 120°\n(C) 110°\n(D) 130°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 40.34375, "loss": 2.5288233757019043 }, { "id": "identity_179691", "image": "IconQA/images/72968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 10 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 5:50" } ], "clip_score": 30.390625, "loss": 1.5106604099273682 }, { "id": "identity_195565", "image": "TextVQA/images/8b7c688892e36f16.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two predominant colors of the team uniforms visible in the image?" }, { "from": "gpt", "value": "The answer is White and red" } ], "clip_score": 20.015625, "loss": 3.5381414890289307 }, { "id": "identity_87870", "image": "GeoQA+/images/592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, with CD as the height from C to AB, and CM as the angle bisector of angle ACB, what is the ratio of AD to DB?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 37.5, "loss": 2.0371875762939453 }, { "id": "identity_267520", "image": "TextVQA/images/18ddf932b546a292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which number is considered the bottom number?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 13.296875, "loss": 2.4522407054901123 }, { "id": "identity_300652", "image": "DocVQA/images/tlnf0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are these investments based on?" }, { "from": "gpt", "value": "The answer is short-term" } ], "clip_score": 14.671875, "loss": 3.633047342300415 }, { "id": "identity_211121", "image": "PMC-VQA/images/PMC9356014_Fig1_365449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure could these images be showing?" }, { "from": "gpt", "value": "The answer is Blood vessels or ducts within an organ" } ], "clip_score": 29.3125, "loss": 3.1577813625335693 }, { "id": "identity_311253", "image": "VQA2.0/images/518207.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there something here that will keep drinks cold?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.0625, "loss": 2.962245225906372 }, { "id": "identity_88702", "image": "UniGeo/images/271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠3 is 55°, what is the relationship between ∠2 and ∠4?" }, { "from": "gpt", "value": "The answer is ∠2 and ∠4 are congruent" } ], "clip_score": 30.625, "loss": 1.2092467546463013 }, { "id": "identity_291428", "image": "TQA/images/question_images/faults_1734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of different types of walls available?\nChoices:\n(A) 1\n(B) 3\n(C) 4\n(D) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.4375, "loss": 3.5011894702911377 }, { "id": "identity_13611", "image": "DVQA/images/bar_train_00179367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars are there?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 19.625, "loss": 3.768782138824463 }, { "id": "identity_91977", "image": "IconQA/images/41415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 60 times, how many times is it unlikely to land on blue?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 26.703125, "loss": 2.352139711380005 }, { "id": "identity_77408", "image": "ScienceQA/images/9970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of California?" }, { "from": "gpt", "value": "The answer is Sacramento" } ], "clip_score": 22.984375, "loss": 2.038645029067993 }, { "id": "identity_271076", "image": "ScienceQA/images/5380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this clipboard?\nChoices:\n(A) wood\n(B) concrete" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.984375, "loss": 4.506109714508057 }, { "id": "identity_27554", "image": "IconQA/images/18005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a square with this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.265625, "loss": 4.246384620666504 }, { "id": "identity_260481", "image": "PMC-VQA/images/PMC3795359_F2_237149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of a sagittal section of the brain, and how do they compare to the features observed in the image provided?" }, { "from": "gpt", "value": "The answer is Sagittal section shows side view with corpus callosum, cerebellum, brainstem" } ], "clip_score": 20.96875, "loss": 2.2859110832214355 }, { "id": "identity_171553", "image": "IconQA/images/72975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you arrange the marbles in a straight line, how many different arrangements are possible?" }, { "from": "gpt", "value": "The answer is 6 arrangements" } ], "clip_score": 24.953125, "loss": 2.463848352432251 }, { "id": "identity_188659", "image": "A-OKVQA/images/447179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ribbon on the cake?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.5625, "loss": 3.2961838245391846 }, { "id": "identity_294023", "image": "ScienceQA/images/4923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated in the highlighted area?\nChoices:\n(A) Australia\n(B) Antarctica\n(C) Africa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 4.701534271240234 }, { "id": "identity_81038", "image": "Geometry3K/images/132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC in the circle?" }, { "from": "gpt", "value": "The answer is 46 degrees" } ], "clip_score": 35.28125, "loss": 2.6198060512542725 }, { "id": "identity_194778", "image": "PMC-VQA/images/PMC4144387_fig1_315613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the arrow pointing to in the image labeled (d)?" }, { "from": "gpt", "value": "The answer is Hyperdense area in the brain" } ], "clip_score": 29.484375, "loss": 3.4080810546875 }, { "id": "identity_83667", "image": "GeoQA+/images/641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the sector used to form the cone?" }, { "from": "gpt", "value": "The answer is 16*pi cm^2" } ], "clip_score": 30.75, "loss": 2.549832582473755 }, { "id": "identity_244532", "image": "A-OKVQA/images/123555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the bus route, and what does the sign beside the green light forbid?" }, { "from": "gpt", "value": "The answer is North, left turns" } ], "clip_score": 23.390625, "loss": 5.931435585021973 }, { "id": "identity_202262", "image": "A-OKVQA/images/229149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is taking place in the image?" }, { "from": "gpt", "value": "The answer is A cycling race event" } ], "clip_score": 28.984375, "loss": 3.229172945022583 }, { "id": "identity_103615", "image": "TextVQA/images/04d3cddda381ef11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time remaining in the first half of the game?" }, { "from": "gpt", "value": "The answer is 6 minutes and 56 seconds" } ], "clip_score": 15.34375, "loss": 1.9558581113815308 }, { "id": "identity_97041", "image": "IconQA/images/97665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual paper clips are not part of the chain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.109375, "loss": 2.3949759006500244 }, { "id": "identity_326513", "image": "IconQA/images/15646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 19.9375, "loss": 2.122178554534912 }, { "id": "identity_148391", "image": "DocVQA/images/zzml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is LVP Jr" } ], "clip_score": 19.8125, "loss": 6.420950889587402 }, { "id": "identity_53890", "image": "FigureQA/images/67815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Blue line always above the Red line throughout the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.28125, "loss": 3.2443175315856934 }, { "id": "identity_307329", "image": "Super-CLEVR/images/superCLEVR_new_000564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the small rubber scooter the same color as the big thing to the left of the big brown rubber articulated bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.5, "loss": 5.188022136688232 }, { "id": "identity_49431", "image": "FigureQA/images/55586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Orchid section in the pie chart?" }, { "from": "gpt", "value": "The answer is 75%" } ], "clip_score": 38.71875, "loss": 1.6783781051635742 }, { "id": "identity_336033", "image": "VQA2.0/images/511930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing aprons?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.015625, "loss": 2.496978998184204 }, { "id": "identity_168230", "image": "IconQA/images/32977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the paper clips if one paper clip is approximately 3 centimeters long?" }, { "from": "gpt", "value": "The answer is 9 centimeters" } ], "clip_score": 31.921875, "loss": 1.8503665924072266 }, { "id": "identity_208701", "image": "VQA-AS/images/17248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being cooked on the grill?" }, { "from": "gpt", "value": "The answer is Sausages" } ], "clip_score": 25.375, "loss": 2.175079345703125 }, { "id": "identity_161379", "image": "GeoQA+/images/5652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the measure of angle AOB is 120 degrees, what is the measure of angle COD?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 26.734375, "loss": 1.3657292127609253 }, { "id": "identity_121248", "image": "PMC-VQA/images/PMC6022407_Fig8_334272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are predominantly visible in the lower layer of the microphotograph?" }, { "from": "gpt", "value": "The answer is Fibroblasts" } ], "clip_score": 30.21875, "loss": 2.3496508598327637 }, { "id": "identity_306055", "image": "VizWiz/images/VizWiz_train_00019301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the date of expiration" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.9296875, "loss": 3.989811420440674 }, { "id": "identity_254628", "image": "TextVQA/images/ecf5a361d8aa85cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the concept stated on the large hanging banner relate to the activities and interactions of the people in the image?" }, { "from": "gpt", "value": "The answer is People are engaged in discussions and sharing information" } ], "clip_score": 21.53125, "loss": 3.4228031635284424 }, { "id": "identity_109188", "image": "A-OKVQA/images/327466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the boats in the marina?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.5, "loss": 3.1206259727478027 }, { "id": "identity_305316", "image": "VQA-RAD/images/synpic31259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is something present in this xray?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.578125, "loss": 3.1625816822052 }, { "id": "identity_286040", "image": "TextVQA/images/cf2ab13b09dc1f54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which league is this child associated with?" }, { "from": "gpt", "value": "The answer is wroba youth" } ], "clip_score": 25.390625, "loss": 5.432494163513184 }, { "id": "identity_196563", "image": "TextVQA/images/03625040b2eef2a4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals can be seen wearing striped uniforms?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 15.703125, "loss": 4.589463710784912 }, { "id": "identity_115690", "image": "PMC-VQA/images/PMC7789496_Fig1_336679.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used for this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 20.59375, "loss": 4.207422256469727 }, { "id": "identity_8369", "image": "AI2D/images/1130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Name the part found inside ovary\nChoices:\n(A) Style\n(B) Ovule\n(C) Sigma\n(D) Sepal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 3.8259711265563965 }, { "id": "identity_63783", "image": "ScienceQA/images/20418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What physical feature helps the animal blend into its environment?" }, { "from": "gpt", "value": "The answer is White fur" } ], "clip_score": 28.8125, "loss": 3.053443193435669 }, { "id": "identity_243220", "image": "GeoQA+/images/55.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O的圆心角∠BOC=112°,点D在弦BA的延长线上且AD=AC,求∠CAD的度数。" }, { "from": "gpt", "value": "The answer is 68°" } ], "clip_score": 26.375, "loss": 1.959458827972412 }, { "id": "identity_187877", "image": "VQA2.0/images/218406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the bedspread?" }, { "from": "gpt", "value": "The answer is Zigzag" } ], "clip_score": 29.59375, "loss": 2.3053362369537354 }, { "id": "identity_216864", "image": "Super-CLEVR/images/superCLEVR_new_000273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.078125, "loss": 4.610104560852051 }, { "id": "identity_161355", "image": "GeoQA+/images/361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle BCD is 25 degrees, what is the measure of angle BAD?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 35.71875, "loss": 1.7478766441345215 }, { "id": "identity_274763", "image": "A-OKVQA/images/453409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the tower located behind the central festival decoration used for broadcasting?\nChoices:\n(A) cellular service\n(B) radar\n(C) television\n(D) radio" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.015625, "loss": 4.184623718261719 }, { "id": "identity_153971", "image": "TQA/images/question_images/seasons_6287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season is represented at the bottom left of the diagram?" }, { "from": "gpt", "value": "The answer is Summer" } ], "clip_score": 33.0, "loss": 3.7672219276428223 }, { "id": "identity_164935", "image": "GeoQA+/images/3326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOB in the given image?" }, { "from": "gpt", "value": "The answer is 72" } ], "clip_score": 32.71875, "loss": 2.881420135498047 }, { "id": "identity_225802", "image": "UniGeo/images/2694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,平行四边形ABCD的对角线交于点O,且AB=6,△OCD的周长为23,若平行四边形ABCD的面积为48,则平行四边形ABCD的两条对角线的乘积是多少?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 25.640625, "loss": 2.322000026702881 }, { "id": "identity_243626", "image": "A-OKVQA/images/213592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport are these men on horseback playing, and what equipment are they using to play it?" }, { "from": "gpt", "value": "The answer is Polo, using mallets" } ], "clip_score": 31.25, "loss": 2.9940855503082275 }, { "id": "identity_28898", "image": "ScienceQA/images/7408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.1875, "loss": 3.5698235034942627 }, { "id": "identity_62791", "image": "AI2D/images/2351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In this cycle, what comes after the \"seed\" stage?" }, { "from": "gpt", "value": "The answer is Flower" } ], "clip_score": 32.625, "loss": 2.4558136463165283 }, { "id": "identity_159310", "image": "GeoQA+/images/39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: With the given information that ∠A = ∠CDE and AD = DC, what is the length of segment DE?" }, { "from": "gpt", "value": "The answer is 10m" } ], "clip_score": 33.375, "loss": 2.8460044860839844 }, { "id": "identity_135798", "image": "FigureQA/images/19576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 27.21875, "loss": 3.363495349884033 }, { "id": "identity_182494", "image": "A-OKVQA/images/369504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car parked closest to the camera?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.015625, "loss": 3.0973424911499023 }, { "id": "identity_18730", "image": "ScienceQA/images/15014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Trenton\n(B) Montpelier\n(C) Providence\n(D) Newport" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.390625, "loss": 4.140719890594482 }, { "id": "identity_195351", "image": "PMC-VQA/images/PMC4846987_f5_24859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images symmetrical or asymmetrical?" }, { "from": "gpt", "value": "The answer is Asymmetrical" } ], "clip_score": 20.328125, "loss": 1.9069106578826904 }, { "id": "identity_31372", "image": "GeoQA+/images/4239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线a、b分别经过等边三角形ABC的顶点A、C,且a∥b,∠1=42°,则∠2的度数为()\nChoices:\n(A) 18°\n(B) 42°\n(C) 60°\n(D) 102°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.6875, "loss": 2.7907662391662598 }, { "id": "identity_213029", "image": "TextVQA/images/5af1cba38746b57c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible ice in the glass?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.53125, "loss": 3.0627055168151855 }, { "id": "identity_95266", "image": "IconQA/images/56709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past the hour is it?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 30.078125, "loss": 2.1027209758758545 }, { "id": "identity_224396", "image": "FigureQA/images/50254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the Royal Blue and Purple sections?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 33.5625, "loss": 1.7246872186660767 }, { "id": "identity_321164", "image": "PMC-VQA/images/PMC3116541_F0003_99168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the optic disc does the medial PCA supply blood to?\nChoices:\n(A) Temporal half\n(B) Nasal half\n(C) Upper half\n(D) Lower half" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.6875, "loss": 4.085528373718262 }, { "id": "identity_128201", "image": "TextVQA/images/66dee7a7487682a6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is prominently displayed on the banners around the podium?" }, { "from": "gpt", "value": "The answer is Emirates" } ], "clip_score": 28.53125, "loss": 2.1308035850524902 }, { "id": "identity_153856", "image": "ScienceQA/images/812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean is located at 60°N and 100°W?" }, { "from": "gpt", "value": "The answer is Arctic Ocean" } ], "clip_score": 27.96875, "loss": 2.56270170211792 }, { "id": "identity_216654", "image": "TextVQA/images/34afc4d984a1a681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the billboard in the background?" }, { "from": "gpt", "value": "The answer is Origin" } ], "clip_score": 14.1796875, "loss": 4.033825874328613 }, { "id": "identity_24144", "image": "TabMWP/images/18250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 1 pound of double chocolate cookie dough?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.875, "loss": 2.9167606830596924 }, { "id": "identity_146176", "image": "DocVQA/images/rxgm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason given for a potential delay in response?" }, { "from": "gpt", "value": "The answer is Mary Teloh had a baby girl" } ], "clip_score": 19.21875, "loss": 5.70749044418335 }, { "id": "identity_130223", "image": "A-OKVQA/images/374685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the bags inside the cages?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 25.75, "loss": 4.043757438659668 }, { "id": "identity_186511", "image": "VQA2.0/images/67749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus in the image?" }, { "from": "gpt", "value": "The answer is Green and white" } ], "clip_score": 27.8125, "loss": 2.6131930351257324 }, { "id": "identity_310615", "image": "AI2D/images/2959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the body opening through which food is taken in by an animal?\nChoices:\n(A) tonsil\n(B) hard palate\n(C) mouth\n(D) tongue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.390625, "loss": 4.069561958312988 }, { "id": "identity_280491", "image": "MapQA/images/map_9238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Pennsylvania the state bordering New Jersey with the lowest value among them?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.25, "loss": 2.822007656097412 }, { "id": "identity_282341", "image": "VQA-AS/images/19047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of his shirt?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 18.234375, "loss": 3.0917763710021973 }, { "id": "identity_277886", "image": "AI2D/images/3197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the plant is located above the soil in the given system? \n(A) flower \n(B) leaf \n(C) shoot system \n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.75, "loss": 3.753598690032959 }, { "id": "identity_229222", "image": "VQA-RAD/images/synpic22097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormalities can be identified in this chest X-ray, and what might they indicate about the patient's health condition?" }, { "from": "gpt", "value": "The answer is Possible pleural effusion or consolidation in the left lung field" } ], "clip_score": 34.375, "loss": 3.4150235652923584 }, { "id": "identity_201017", "image": "A-OKVQA/images/448694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team uniform of the player attempting to catch the disc?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.5, "loss": 3.4876179695129395 }, { "id": "identity_127219", "image": "VQA2.0/images/512037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is seen on the banks of the river?" }, { "from": "gpt", "value": "The answer is Grass and shrubs" } ], "clip_score": 19.734375, "loss": 2.610342025756836 }, { "id": "identity_216750", "image": "A-OKVQA/images/172886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bath mat on the floor?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.8125, "loss": 3.398326873779297 }, { "id": "identity_167621", "image": "TabMWP/images/38165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average time Cody spent on homework per day from Sunday to Thursday?" }, { "from": "gpt", "value": "The answer is 34.4 minutes" } ], "clip_score": 36.90625, "loss": 1.8007221221923828 }, { "id": "identity_251412", "image": "TextVQA/images/d383dd516697e184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many monitors display the word \"five\" and what is the total number of unique words displayed across all monitors?" }, { "from": "gpt", "value": "The answer is 5 monitors, 3 unique words" } ], "clip_score": 24.75, "loss": 2.6187219619750977 }, { "id": "identity_247884", "image": "VQA2.0/images/52497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three specific items in the image that indicate this is a kitchen and explain how each item supports your conclusion" }, { "from": "gpt", "value": "The answer is Stove, sink, kitchen counter" } ], "clip_score": 23.453125, "loss": 3.433293104171753 }, { "id": "identity_254442", "image": "A-OKVQA/images/160740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three distinct visual elements in the photo that indicate the country is Japan?" }, { "from": "gpt", "value": "The answer is Japanese characters, cherry blossoms, urban layout" } ], "clip_score": 28.9375, "loss": 3.0863192081451416 }, { "id": "identity_253082", "image": "MapQA/images/map_7473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a number of losses in the range of 69-86, and what is the total number of states with losses in the range of 54-66 and 36-51 combined?" }, { "from": "gpt", "value": "The answer is 6 states, 12 states" } ], "clip_score": 32.25, "loss": 2.3887436389923096 }, { "id": "identity_266703", "image": "IconQA/images/101278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of shapes available to choose from?\nChoices:\n(A) 2\n(B) 3\n(C) 1" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5, "loss": 3.5747275352478027 }, { "id": "identity_338236", "image": "UniGeo/images/44.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,AB=AD=DC,∠B=70°,则∠C的度数是多少?\n选项:\n(A) 35°\n(B) 40°\n(C) 45°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.765625, "loss": 2.569121837615967 }, { "id": "identity_334583", "image": "A-OKVQA/images/106453.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which era was this image captured?\nChoices:\n(A) frontier days\n(B) prehistoric\n(C) modern\n(D) victorian" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.1875, "loss": 4.65606164932251 }, { "id": "identity_131912", "image": "A-OKVQA/images/422326.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the photo appear to be taken?" }, { "from": "gpt", "value": "The answer is Evening" } ], "clip_score": 13.6328125, "loss": 3.433574914932251 }, { "id": "identity_251101", "image": "CLEVR-Math/images/CLEVR_train_013200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 8 yellow metal cylinders to the existing objects, how many total objects will there be?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 28.0625, "loss": 2.22281813621521 }, { "id": "identity_243048", "image": "A-OKVQA/images/187286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual indicators in the image that suggest the cyclists must stop for a train crossing, and how many such indicators can you identify?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 29.21875, "loss": 5.6192626953125 }, { "id": "identity_143911", "image": "MapQA/images/map_10693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 32.7%-43.1%" } ], "clip_score": 27.625, "loss": 1.9678618907928467 }, { "id": "identity_90834", "image": "GeoQA+/images/623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with AB = 8, BC = 12, and points D and E as midpoints of AB and AC respectively, what is the length of line segment DE?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.40625, "loss": 2.376741886138916 }, { "id": "identity_327169", "image": "MapQA/images/map_11371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of this state?" }, { "from": "gpt", "value": "The answer is 37.1%-37.3%" } ], "clip_score": 25.40625, "loss": 1.6229978799819946 }, { "id": "identity_120612", "image": "VQA-AS/images/11630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the foliage in the background?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.6875, "loss": 3.460800886154175 }, { "id": "identity_223724", "image": "TextVQA/images/f18ed88c75788d88.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is displayed on the banner in the background?" }, { "from": "gpt", "value": "The answer is 2006" } ], "clip_score": 12.578125, "loss": 1.4653685092926025 }, { "id": "identity_76197", "image": "TQA/images/abc_question_images/hair_follicles_16989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the narrowest part of the structure shown in the diagram?" }, { "from": "gpt", "value": "The answer is Label V" } ], "clip_score": 27.984375, "loss": 3.5285682678222656 }, { "id": "identity_125972", "image": "A-OKVQA/images/397325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man wearing on his head?" }, { "from": "gpt", "value": "The answer is A cowboy hat" } ], "clip_score": 22.015625, "loss": 2.655447244644165 }, { "id": "identity_275742", "image": "PMC-VQA/images/PMC4161720_Fig1_319755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of medical imaging was utilized for diagnosing the condition among the following options: X-Ray, MRI, PET-Scan, or CT-Scan?\nChoices:\n(A) X-Ray \n(B) MRI \n(C) PET-Scan \n(D) CT-Scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.6875, "loss": 4.0286688804626465 }, { "id": "identity_152620", "image": "ScienceQA/images/5485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the object labeled \"tissue\" most likely made of?" }, { "from": "gpt", "value": "The answer is Paper pulp" } ], "clip_score": 21.359375, "loss": 3.2100865840911865 }, { "id": "identity_176171", "image": "TabMWP/images/16075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Cora wants to increase her average number of baskets made per day to 5, how many baskets does she need to make on an additional day?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 37.84375, "loss": 2.991070032119751 }, { "id": "identity_132027", "image": "PMC-VQA/images/PMC7555047_diagnostics-10-00645-f013_271682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the arrows in the image?" }, { "from": "gpt", "value": "The answer is They indicate the presence of a nerve" } ], "clip_score": 25.140625, "loss": 3.7848331928253174 }, { "id": "identity_205316", "image": "A-OKVQA/images/523262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the ski poles in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.3125, "loss": 3.3608031272888184 }, { "id": "identity_255355", "image": "A-OKVQA/images/178379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the people in the image doing, and why are they wearing black helmets?" }, { "from": "gpt", "value": "The answer is Riding horses for safety" } ], "clip_score": 28.9375, "loss": 3.6746275424957275 }, { "id": "identity_243062", "image": "VQA2.0/images/346450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sails are visible on the sailboat, and what is the color of the visible sail?" }, { "from": "gpt", "value": "The answer is One sail, blue" } ], "clip_score": 18.078125, "loss": 3.45998215675354 }, { "id": "identity_244823", "image": "GeoQA+/images/3866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,请根据尺规作图的痕迹,若AC=6,AD=2,且BD的长为4,则BC的长为多少?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 25.796875, "loss": 2.352092981338501 }, { "id": "identity_273971", "image": "MapQA/images/map_901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state bordering Connecticut has the highest value?" }, { "from": "gpt", "value": "The answer is 247-289" } ], "clip_score": 25.25, "loss": 1.9265689849853516 }, { "id": "identity_158284", "image": "TQA/images/abc_question_images/human_system_nervous_16172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter label indicates the location of the frontal lobe?" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 29.765625, "loss": 3.240210771560669 }, { "id": "identity_262778", "image": "Geometry3K/images/1238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of the variable x shown in the figure.\nOptions:\n(A) 46.7\n(B) 60\n(C) 70\n(D) 80" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.453125, "loss": 3.2061333656311035 }, { "id": "identity_324190", "image": "MapQA/images/map_3064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Florida have the highest value among the states bordering Georgia?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.203125, "loss": 2.5609750747680664 }, { "id": "identity_322538", "image": "GeoQA+/images/1062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠C=90°,AC=4,tanA=\\frac{1}{2},则BC的长度是多少?\n选项:\n(A) 2\n(B) 8\n(C) 4√{3}\n(D) 4√{5}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.453125, "loss": 2.95823073387146 }, { "id": "identity_47717", "image": "FigureQA/images/42702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Orange Red in the pie chart?" }, { "from": "gpt", "value": "The answer is Less than 1/4 but more than 1/8" } ], "clip_score": 33.09375, "loss": 2.0444860458374023 }, { "id": "identity_141508", "image": "DocVQA/images/jxpj0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Coca-Cola trademark volume in North America was low- and no-calorie in 1999?" }, { "from": "gpt", "value": "The answer is 32%" } ], "clip_score": 21.3125, "loss": 1.94673752784729 }, { "id": "identity_261296", "image": "DVQA/images/bar_train_00018790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bars?" }, { "from": "gpt", "value": "The answer is nine" } ], "clip_score": 21.859375, "loss": 3.762856960296631 }, { "id": "identity_134665", "image": "DVQA/images/bar_train_00155441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the \"feel\" and \"mean\" datasets for the \"factor\" algorithm?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 36.4375, "loss": 3.1531918048858643 }, { "id": "identity_295380", "image": "VQA2.0/images/563329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the temperature like outside?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 13.6875, "loss": 3.216010570526123 }, { "id": "identity_13327", "image": "DocVQA/images/fkkd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the project client representative?" }, { "from": "gpt", "value": "The answer is Doug Dillman" } ], "clip_score": 19.09375, "loss": 4.182151794433594 }, { "id": "identity_220322", "image": "VQA2.0/images/271417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees can be seen in the background?" }, { "from": "gpt", "value": "The answer is More than 10" } ], "clip_score": 16.0625, "loss": 2.3666632175445557 }, { "id": "identity_50641", "image": "DVQA/images/bar_train_00024267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value reached by the category represented by green triangles?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 16.09375, "loss": 2.18768048286438 }, { "id": "identity_25620", "image": "IconQA/images/16947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beads are there?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 26.53125, "loss": 2.1810944080352783 }, { "id": "identity_303810", "image": "UniGeo/images/3102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知∠1=∠2,∠D=78°,则∠BCD是多少?\n选项:\n(A) 98°\n(B) 62°\n(C) 88°\n(D) 102°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.21875, "loss": 2.760627269744873 }, { "id": "identity_41246", "image": "PlotQA/images/138155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in gross enrollment ratio between Mexico and Philippines in Tertiary schools?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 31.0, "loss": 1.974782943725586 }, { "id": "identity_76918", "image": "ScienceQA/images/3447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the habitat of the organism shown in the image?" }, { "from": "gpt", "value": "The answer is Wetland or marshy area" } ], "clip_score": 20.40625, "loss": 2.2824337482452393 }, { "id": "identity_324315", "image": "A-OKVQA/images/56116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the white cup in front of this chair from?\nChoices:\n(A) wal mart\n(B) dunkin\n(C) wawa\n(D) starbucks" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.546875, "loss": 4.111055850982666 }, { "id": "identity_210969", "image": "Super-CLEVR/images/superCLEVR_new_000278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 13.71875, "loss": 3.1771442890167236 }, { "id": "identity_17723", "image": "ScienceQA/images/10518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these states is farthest north?\nChoices:\n(A) Arkansas\n(B) Kentucky\n(C) Arizona\n(D) Alabama" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 3.5887069702148438 }, { "id": "identity_118061", "image": "VQA2.0/images/538120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the bus?" }, { "from": "gpt", "value": "The answer is Metro" } ], "clip_score": 22.59375, "loss": 4.227685928344727 }, { "id": "identity_253629", "image": "A-OKVQA/images/203986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary material of the surface the spectators are sitting on, and how many rows of seating are visible in the image?" }, { "from": "gpt", "value": "The answer is Bleachers, 4 rows" } ], "clip_score": 20.0, "loss": 3.1880722045898438 }, { "id": "identity_242831", "image": "TabMWP/images/38345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean number of math teachers across all high schools, and how does it compare to the median number of math teachers?" }, { "from": "gpt", "value": "The answer is Mean is 6.6, which is less than the median 8" } ], "clip_score": 32.84375, "loss": 2.6105711460113525 }, { "id": "identity_182461", "image": "A-OKVQA/images/261426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main protein ingredient in the sandwich?" }, { "from": "gpt", "value": "The answer is Chicken" } ], "clip_score": 29.859375, "loss": 3.112550735473633 }, { "id": "identity_242162", "image": "A-OKVQA/images/270716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which car is in the greatest danger if the black truck were to suddenly lose control and veer to its right?" }, { "from": "gpt", "value": "The answer is red sedan" } ], "clip_score": 18.859375, "loss": 4.1504974365234375 }, { "id": "identity_70500", "image": "AI2D/images/527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the arrow labeled M?" }, { "from": "gpt", "value": "The answer is Weathering and erosion" } ], "clip_score": 28.875, "loss": 2.5038416385650635 }, { "id": "identity_226933", "image": "UniGeo/images/1519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD∥BE∥CF,直线l_{1},l_{2}与这三条平行线分别交于点A,B,C和点D,E,F,已知DE=6,若AB=3,则DF的值为多少?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 28.171875, "loss": 2.0789856910705566 }, { "id": "identity_294648", "image": "VQA2.0/images/510813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which age group do these players belong?" }, { "from": "gpt", "value": "The answer is youth" } ], "clip_score": 18.09375, "loss": 3.7236695289611816 }, { "id": "identity_291811", "image": "ChartQA/images/multi_col_1273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In 2014, what percentage of LG Electronics sales were in Korea?" }, { "from": "gpt", "value": "The answer is 13.81" } ], "clip_score": 23.609375, "loss": 2.1639981269836426 }, { "id": "identity_278869", "image": "AI2D/images/4413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of leaf has a blunt or rounded tip: acute, acuminate, truncate, or obtuse?\nChoices:\n(A) acute\n(B) acuminate\n(C) truncate\n(D) obtuse" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.1875, "loss": 3.6911730766296387 }, { "id": "identity_57933", "image": "PlotQA/images/117748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country had the smallest change in rural population percentage between 1960 and 1962?" }, { "from": "gpt", "value": "The answer is Channel Islands" } ], "clip_score": 30.390625, "loss": 3.9911234378814697 }, { "id": "identity_211316", "image": "A-OKVQA/images/268774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue signs can be seen in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.953125, "loss": 2.351118803024292 }, { "id": "identity_47247", "image": "FigureQA/images/12189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pie chart represents 100 units, how many units does Orchid represent?" }, { "from": "gpt", "value": "The answer is Between 45 and 49 units" } ], "clip_score": 34.4375, "loss": 2.521359443664551 }, { "id": "identity_78651", "image": "AI2D/images/1459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What happens to the magma chamber after the eruption?" }, { "from": "gpt", "value": "The answer is It becomes partially emptied" } ], "clip_score": 32.375, "loss": 3.515904664993286 }, { "id": "identity_246586", "image": "GeoQA+/images/5929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ⊙O is the circumcircle of △ABC, ∠ABO=20°, and ∠OAC=40°, find the measure of ∠BAC" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 35.71875, "loss": 1.7252354621887207 }, { "id": "identity_106074", "image": "PMC-VQA/images/PMC4141340_Fig12_314910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image taken from a male or female patient?" }, { "from": "gpt", "value": "The answer is Male" } ], "clip_score": 28.625, "loss": 3.245391845703125 }, { "id": "identity_7544", "image": "VQA-AS/images/1496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where letters are on the blocks?" }, { "from": "gpt", "value": "The answer is abcd" } ], "clip_score": 11.9453125, "loss": 3.722839117050171 }, { "id": "identity_288967", "image": "Geometry3K/images/1597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the perimeter of the triangle depicted below, rounding to the nearest tenth.\nChoices:\n(A) 222.6\n(B) 300.6\n(C) 325.6\n(D) 377.6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.03125, "loss": 2.738095998764038 }, { "id": "identity_71882", "image": "TQA/images/question_images/human_system_eye_6132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the gel-like substance that fills the space between the lens and the retina?" }, { "from": "gpt", "value": "The answer is Vitreous gel" } ], "clip_score": 32.9375, "loss": 2.030862331390381 }, { "id": "identity_169277", "image": "TabMWP/images/6863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you distribute the walnuts from the 5th bowl equally into two new bowls, how many walnuts will each new bowl contain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.8125, "loss": 2.4962527751922607 }, { "id": "identity_183484", "image": "PMC-VQA/images/PMC7367244_Fig1_215713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the mass located in the upper or lower limb?" }, { "from": "gpt", "value": "The answer is Upper limb" } ], "clip_score": 19.671875, "loss": 2.824798822402954 }, { "id": "identity_33907", "image": "TextVQA/images/66ba90615f210fe9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letter-number designation was given to this airplane?" }, { "from": "gpt", "value": "The answer is q9" } ], "clip_score": 18.609375, "loss": 3.042572021484375 }, { "id": "identity_320581", "image": "PMC-VQA/images/PMC4143576_F2_315480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What spinal cord segments are impacted by the spinal stenosis visible in the MRI?\nChoices:\n(A) C2 and C3\n(B) C5 and C6\n(C) T7 and T8\n(D) L2 and L3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.75, "loss": 3.794661283493042 }, { "id": "identity_90466", "image": "UniGeo/images/816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB=AC and ∠BCA=65°, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 34.8125, "loss": 1.7494863271713257 }, { "id": "identity_41776", "image": "FigureQA/images/73223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values on the x-axis for Burlywood?" }, { "from": "gpt", "value": "The answer is 20 to 80" } ], "clip_score": 23.96875, "loss": 1.9063720703125 }, { "id": "identity_146424", "image": "DocVQA/images/zpyf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the staff person responsible for the project?" }, { "from": "gpt", "value": "The answer is Matthew Maxon" } ], "clip_score": 18.078125, "loss": 3.732391357421875 }, { "id": "identity_147637", "image": "DVQA/images/bar_train_00089184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference for 'brand' between 'mature' and 'jacket' categories?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.890625, "loss": 3.1045970916748047 }, { "id": "identity_285760", "image": "A-OKVQA/images/111940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the bike, how many mufflers are present?\nChoices:\n(A) two\n(B) four\n(C) one\n(D) three" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.421875, "loss": 3.4937729835510254 }, { "id": "identity_105183", "image": "VizWiz/images/VizWiz_train_00014440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the object?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 26.8125, "loss": 3.1090564727783203 }, { "id": "identity_278261", "image": "TQA/images/question_images/atomic_mass_number_9005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the atomic number of protium?\n(A) 1\n(B) 3\n(C) 4\n(D) 2" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.15625, "loss": 3.43122935295105 }, { "id": "identity_263910", "image": "TabMWP/images/29851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time does lap swim start according to the provided schedule?\nChoices:\n(A) 5:30 P.M.\n(B) 10:15 A.M.\n(C) 11:55 A.M.\n(D) 1:25 P.M." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.96875, "loss": 3.5084125995635986 }, { "id": "identity_320974", "image": "DocVQA/images/hxxw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the telephone number mentioned here?" }, { "from": "gpt", "value": "The answer is (202) 833-1601" } ], "clip_score": 16.203125, "loss": 1.5824815034866333 }, { "id": "identity_36331", "image": "PMC-VQA/images/PMC5641467_F2_227206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the nature of the lesion seen in the image? \nChoices:\n(A) Benign \n(B) Malignant \n(C) Uncertain \n(D) Cannot be determined " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.6875, "loss": 4.1233062744140625 }, { "id": "identity_132002", "image": "Super-CLEVR/images/superCLEVR_new_001845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle that matches the color of the scooter?" }, { "from": "gpt", "value": "The answer is Yes, the airplane" } ], "clip_score": 23.46875, "loss": 3.5446903705596924 }, { "id": "identity_320437", "image": "MapQA/images/map_11045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value in Virginia the lowest in the South?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.1875, "loss": 2.989927291870117 }, { "id": "identity_17577", "image": "UniGeo/images/724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,AD为⊙O的直径,交BC于点E,若DE=2,OE=3,则tanC•tanB=()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.125, "loss": 2.5046675205230713 }, { "id": "identity_73603", "image": "ScienceQA/images/3304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern observed on the pads of the animal's foot?" }, { "from": "gpt", "value": "The answer is Parallel ridges" } ], "clip_score": 28.046875, "loss": 3.9071598052978516 }, { "id": "identity_269057", "image": "FigureQA/images/83605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Saddle Brown and Firebrick intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.96875, "loss": 3.4671471118927 }, { "id": "identity_72805", "image": "AI2D/images/1352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the Earth rotating around the Sun?" }, { "from": "gpt", "value": "The answer is Counterclockwise" } ], "clip_score": 32.21875, "loss": 1.9089405536651611 }, { "id": "identity_19468", "image": "VQA2.0/images/270722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this bear sleeping?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.5625, "loss": 3.4908599853515625 }, { "id": "identity_35129", "image": "A-OKVQA/images/501571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What setting do these mounted persons stand in?\nChoices:\n(A) park\n(B) riot\n(C) mall\n(D) grocery store" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.1875, "loss": 4.287363529205322 }, { "id": "identity_160205", "image": "GeoQA+/images/1436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AC is 6 units and angle ACD is 30 degrees, what is the length of CD?" }, { "from": "gpt", "value": "The answer is 3√3 units" } ], "clip_score": 30.609375, "loss": 1.6421674489974976 }, { "id": "identity_146605", "image": "MapQA/images/map_6674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the second-highest percentage range of pickers?" }, { "from": "gpt", "value": "The answer is the name of the state or states" } ], "clip_score": 37.75, "loss": 3.1827878952026367 }, { "id": "identity_181233", "image": "CLEVR-Math/images/CLEVR_train_067382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither metallic nor cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.546875, "loss": 2.542943239212036 }, { "id": "identity_200377", "image": "PMC-VQA/images/PMC8399726_Fig4_46470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.625, "loss": 2.323068380355835 }, { "id": "identity_63987", "image": "ScienceQA/images/8207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is located directly to the south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Pennsylvania" } ], "clip_score": 31.375, "loss": 3.4456875324249268 }, { "id": "identity_197696", "image": "PMC-VQA/images/PMC6976649_Fig4_104392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.15625, "loss": 3.2067089080810547 }, { "id": "identity_103531", "image": "TextVQA/images/db3bdd5768bc19f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many numerical hour markers are displayed on the watch face?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 25.5625, "loss": 1.8404654264450073 }, { "id": "identity_179542", "image": "IconQA/images/15615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two penguins were to leave, how many penguins would remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.921875, "loss": 2.151134967803955 }, { "id": "identity_92508", "image": "TabMWP/images/10369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did Alec spend the most time on homework?" }, { "from": "gpt", "value": "The answer is Thursday" } ], "clip_score": 39.40625, "loss": 1.9277828931808472 }, { "id": "identity_149377", "image": "DocVQA/images/yljd0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of admissions from 1971 to 1975 as shown in the graph?" }, { "from": "gpt", "value": "The answer is Increasing then slight decrease" } ], "clip_score": 31.703125, "loss": 4.098626613616943 }, { "id": "identity_265852", "image": "CLEVR-Math/images/CLEVR_train_002909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 yellow balls, how many yellow balls remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.765625, "loss": 2.368574380874634 }, { "id": "identity_70059", "image": "TQA/images/question_images/seasons_1045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle of tilt of the Earth's axis as shown in the image?" }, { "from": "gpt", "value": "The answer is 23.5 degrees" } ], "clip_score": 33.8125, "loss": 1.113524317741394 }, { "id": "identity_281087", "image": "DocVQA/images/gldj0038_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the current date?" }, { "from": "gpt", "value": "The answer is February 12, 1999" } ], "clip_score": 18.765625, "loss": 1.9624767303466797 }, { "id": "identity_243904", "image": "CLEVR-Math/images/CLEVR_train_032220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total objects are there if you add 1 blue matte sphere to the existing objects in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.0625, "loss": 2.035447120666504 }, { "id": "identity_310346", "image": "DocVQA/images/pxbm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which date was flight NW 3 from Tokyo to Osaka?" }, { "from": "gpt", "value": "The answer is 8/2" } ], "clip_score": 22.171875, "loss": 2.632197618484497 }, { "id": "identity_266426", "image": "TextVQA/images/03235d26a98bcb0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does this motorcycle belong to?" }, { "from": "gpt", "value": "The answer is fc" } ], "clip_score": 26.5, "loss": 4.724361419677734 }, { "id": "identity_193964", "image": "PMC-VQA/images/PMC5992769_Fig1_326031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the outlined area?" }, { "from": "gpt", "value": "The answer is Irregular oval" } ], "clip_score": 16.09375, "loss": 4.346514701843262 }, { "id": "identity_316702", "image": "CLEVR-Math/images/CLEVR_train_001952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 3 large rubber items. How many large rubber items are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.9375, "loss": 2.2364377975463867 }, { "id": "identity_49860", "image": "MapQA/images/map_10639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the number of colleges in the state of Texas?" }, { "from": "gpt", "value": "The answer is a range of numbers" } ], "clip_score": 29.578125, "loss": 3.38106107711792 }, { "id": "identity_91541", "image": "GeoQA+/images/1277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: With AB parallel to CD and angle 1 measuring 60 degrees, what is the sum of the measures of angle 2 and angle 3?" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 31.5625, "loss": 1.4073383808135986 }, { "id": "identity_14603", "image": "PMC-VQA/images/PMC6598368_Fig1_1001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the location of the lesion with ring enhancement? \nChoices:\n(A) Cerebellum \n(B) Brain stem \n(C) Left hemisphere \n(D) Right hemisphere. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.90625, "loss": 3.8826191425323486 }, { "id": "identity_127039", "image": "A-OKVQA/images/391011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tallest building visible in the background?" }, { "from": "gpt", "value": "The answer is Frost Bank Tower" } ], "clip_score": 15.25, "loss": 2.215529203414917 }, { "id": "identity_157176", "image": "ScienceQA/images/1887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the northeast of the highlighted state?" }, { "from": "gpt", "value": "The answer is Lake Erie" } ], "clip_score": 23.890625, "loss": 2.5914857387542725 }, { "id": "identity_167219", "image": "IconQA/images/49412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hearts are in the top row?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.328125, "loss": 2.2475526332855225 }, { "id": "identity_42532", "image": "FigureQA/images/20012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the bar labeled Dark Olive greater than the value of the bar labeled Teal?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.34375, "loss": 3.438772678375244 }, { "id": "identity_190209", "image": "VizWiz/images/VizWiz_train_00018878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object's surface texture?" }, { "from": "gpt", "value": "The answer is Matte" } ], "clip_score": 19.4375, "loss": 3.126450300216675 }, { "id": "identity_188449", "image": "A-OKVQA/images/555065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 22.09375, "loss": 2.2103168964385986 }, { "id": "identity_57448", "image": "MapQA/images/map_9511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Texas?" }, { "from": "gpt", "value": "The answer is 41.5%-46.8%" } ], "clip_score": 21.765625, "loss": 2.2949657440185547 }, { "id": "identity_24582", "image": "VQA2.0/images/366994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there two or more double decker buses on the street?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.703125, "loss": 3.728910446166992 }, { "id": "identity_121341", "image": "VQA2.0/images/134176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of ground surface is shown in the image?" }, { "from": "gpt", "value": "The answer is Dirt or soil" } ], "clip_score": 13.1796875, "loss": 3.4981443881988525 }, { "id": "identity_98227", "image": "IconQA/images/14319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many houses have a chimney?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.890625, "loss": 2.478914976119995 }, { "id": "identity_106238", "image": "TextVQA/images/c6756309965f4533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary material used in the furniture displayed?" }, { "from": "gpt", "value": "The answer is Wood" } ], "clip_score": 23.53125, "loss": 3.810009479522705 }, { "id": "identity_286713", "image": "TabMWP/images/28927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.0, "loss": 4.495229244232178 }, { "id": "identity_176298", "image": "CLEVR-Math/images/CLEVR_train_060510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 2 yellow objects. How many objects are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.109375, "loss": 2.1069276332855225 }, { "id": "identity_81841", "image": "UniGeo/images/2624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given rectangle ABCD, if the length of diagonal AC is 5 units, what is the length of diagonal BD?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 29.75, "loss": 1.8252266645431519 }, { "id": "identity_112696", "image": "VQA-AS/images/9902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are visible in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.546875, "loss": 2.366971492767334 }, { "id": "identity_94943", "image": "IconQA/images/14795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this shape have in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.078125, "loss": 2.2007133960723877 }, { "id": "identity_18635", "image": "GeoQA+/images/5468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 边长为10cm的正方形ABCD绕对角线的交点O旋转到得到正方形OA′B′C′,如图所示,则阴影部分面积为()\nChoices:\n(A) 100cm²\n(B) 75cm²\n(C) 50cm²\n(D) 25cm²" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.78125, "loss": 2.2096619606018066 }, { "id": "identity_222727", "image": "A-OKVQA/images/37905.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the sky in this image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.65625, "loss": 4.061367511749268 }, { "id": "identity_167623", "image": "TabMWP/images/38165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much less time did Cody spend on homework on Wednesday compared to Sunday?" }, { "from": "gpt", "value": "The answer is 4 minutes" } ], "clip_score": 36.34375, "loss": 2.1391804218292236 }, { "id": "identity_224267", "image": "A-OKVQA/images/389401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two objects that are being secured by the black object, and why is it important to secure them in this manner?" }, { "from": "gpt", "value": "The answer is Bike and pole, to prevent theft" } ], "clip_score": 25.296875, "loss": 3.3317925930023193 }, { "id": "identity_21332", "image": "DVQA/images/bar_train_00176209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain stacked bars?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 27.671875, "loss": 4.238978385925293 }, { "id": "identity_157741", "image": "TQA/images/question_images/hydrocarbons_8090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the molecular formula for ethane?" }, { "from": "gpt", "value": "The answer is C2H6" } ], "clip_score": 30.390625, "loss": 1.15425443649292 }, { "id": "identity_295249", "image": "AI2D/images/1878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify two herbivores from the provided food web:\n(A) coyote, bobcat\n(B) dingo, jack rabbit\n(C) dingo, bobcat\n(D) roadrunner&jack rabbit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.765625, "loss": 2.917372226715088 }, { "id": "identity_261405", "image": "PMC-VQA/images/PMC8769554_F3_167335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which technique was employed to capture the images?\nChoices:\n(A) Scanning electron microscopy\n(B) Transmission electron microscopy\n(C) Fluorescence microscopy\n(D) X-ray microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.8125, "loss": 4.088863372802734 }, { "id": "identity_22658", "image": "PMC-VQA/images/PMC6269989_molecules-18-15474-f007_405076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in the image represent? \nChoices:\n(A) Empty space in cortical bone \n(B) Thicker trabecular bone \n(C) Thinner trabecular bone \n(D) Trabecular bone with normal thickness " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.96875, "loss": 5.182900905609131 }, { "id": "identity_245343", "image": "IconQA/images/77449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of the objects from first to fifth?" }, { "from": "gpt", "value": "The answer is moon, rocket, star, rocket, moon" } ], "clip_score": 21.390625, "loss": 1.3511073589324951 }, { "id": "identity_118767", "image": "TextVQA/images/9a8f76f1817e2011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the width of the blue object?" }, { "from": "gpt", "value": "The answer is 3.9 inches" } ], "clip_score": 26.234375, "loss": 2.196993112564087 }, { "id": "identity_124939", "image": "PMC-VQA/images/PMC7435648_molecules-25-03324-f004_236956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely field of study related to this image?" }, { "from": "gpt", "value": "The answer is Microbiology or cellular biology" } ], "clip_score": 27.03125, "loss": 2.1337332725524902 }, { "id": "identity_293907", "image": "GeoQA+/images/3412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, the right angle vertex of the triangle board lies on one side of the rectangular paper. If ∠2 = 50°, then ∠1 = ?\nChoices:\n(A) 35°\n(B) 40°\n(C) 45°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.390625, "loss": 2.8646509647369385 }, { "id": "identity_68378", "image": "ScienceQA/images/8519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 27.90625, "loss": 2.9425978660583496 }, { "id": "identity_315662", "image": "GeoQA+/images/257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙○的直径,CD是⊙○的弦.若∠BAD=21°,则∠ACD的大小是多少?\nChoices:\n(A) 21°\n(B) 59°\n(C) 69°\n(D) 79°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.609375, "loss": 2.646803855895996 }, { "id": "identity_303383", "image": "Geometry3K/images/1017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, Q R \\parallel T S, Q T \\parallel R S, and m \\angle 1 = 131. Determine the measure of \\angle 8.\nChoices:\n(A) 101\n(B) 111\n(C) 121\n(D) 131" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.65625, "loss": 2.1563422679901123 }, { "id": "identity_209906", "image": "VQA2.0/images/182436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are in motion?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.984375, "loss": 2.52301025390625 }, { "id": "identity_337929", "image": "VQA2.0/images/426663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are any of the individuals wearing a tie clip?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 8.203125, "loss": 3.184936046600342 }, { "id": "identity_156496", "image": "ScienceQA/images/15200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow particles are present in Solution A?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.15625, "loss": 2.3940658569335938 }, { "id": "identity_150240", "image": "DocVQA/images/mmlk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the conference?" }, { "from": "gpt", "value": "The answer is SUGAR'S BOUNTY; IN RETROSPECT AND BLUEPRINTS FOR THE FUTURE" } ], "clip_score": 26.46875, "loss": 0.8559724688529968 }, { "id": "identity_284559", "image": "IconQA/images/62498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.703125, "loss": 2.9615328311920166 }, { "id": "identity_87684", "image": "UniGeo/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if angle ABC is 45 degrees and AC is 8 cm, what is the length of AD, the altitude from A to BC?" }, { "from": "gpt", "value": "The answer is 4 cm" } ], "clip_score": 33.0625, "loss": 1.9084340333938599 }, { "id": "identity_98732", "image": "TabMWP/images/34502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the honors graduates received Magna cum laude?" }, { "from": "gpt", "value": "The answer is 22.81%" } ], "clip_score": 31.046875, "loss": 2.0146920680999756 }, { "id": "identity_175947", "image": "TabMWP/images/23510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected balloon animal is green?" }, { "from": "gpt", "value": "The answer is 11/17" } ], "clip_score": 19.0, "loss": 2.0008301734924316 }, { "id": "identity_134973", "image": "FigureQA/images/32678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Dark Salmon" } ], "clip_score": 37.6875, "loss": 2.567739725112915 }, { "id": "identity_125935", "image": "TextVQA/images/3035cc955ef57fd8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the ION USB TURNTABLE?" }, { "from": "gpt", "value": "The answer is £79" } ], "clip_score": 21.390625, "loss": 3.8074047565460205 }, { "id": "identity_204701", "image": "A-OKVQA/images/546877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the boat in the middle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.625, "loss": 3.3096230030059814 }, { "id": "identity_331682", "image": "A-OKVQA/images/28480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal is the same color as this?\nChoices:\n(A) panther\n(B) leopard\n(C) swan\n(D) giraffe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.93359375, "loss": 3.951343536376953 }, { "id": "identity_74910", "image": "ScienceQA/images/4806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the south?" }, { "from": "gpt", "value": "The answer is North Carolina" } ], "clip_score": 31.1875, "loss": 2.8671302795410156 }, { "id": "identity_181302", "image": "IconQA/images/33656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is set to beep every 15 minutes, how many times will it beep in one hour?" }, { "from": "gpt", "value": "The answer is 4 times" } ], "clip_score": 30.6875, "loss": 2.0066726207733154 }, { "id": "identity_170057", "image": "TabMWP/images/8950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Hunter's earnings from his job as a guitar instructor went towards income tax and other payroll taxes?" }, { "from": "gpt", "value": "The answer is 18.2%" } ], "clip_score": 40.40625, "loss": 1.4705110788345337 }, { "id": "identity_106650", "image": "Super-CLEVR/images/superCLEVR_new_000322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the motorcycle have more than two wheels?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.8671875, "loss": 3.3764560222625732 }, { "id": "identity_58647", "image": "FigureQA/images/90045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the y-axis value for Dark Blue at x-axis value 50?" }, { "from": "gpt", "value": "The answer is 86" } ], "clip_score": 30.5, "loss": 2.7828433513641357 }, { "id": "identity_117078", "image": "A-OKVQA/images/360747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the truck?" }, { "from": "gpt", "value": "The answer is A horse" } ], "clip_score": 22.890625, "loss": 5.002538681030273 }, { "id": "identity_324563", "image": "AI2D/images/1825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which organism is identified as the producer?\nChoices:\n(A) hawk\n(B) snake\n(C) grass\n(D) mouse" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 3.818965196609497 }, { "id": "identity_321970", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of place is the area on the left with sign displays most likely to be?\nChoices:\n(A) shopping mall\n(B) strip mall\n(C) market\n(D) rest stop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.7109375, "loss": 4.423489570617676 }, { "id": "identity_216871", "image": "VQA2.0/images/329343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a plant in the room, and if so, where is it located?" }, { "from": "gpt", "value": "The answer is Yes, near the window" } ], "clip_score": 20.5, "loss": 3.418470621109009 }, { "id": "identity_93256", "image": "IconQA/images/48187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not colored?" }, { "from": "gpt", "value": "The answer is 3/5" } ], "clip_score": 29.828125, "loss": 1.8589942455291748 }, { "id": "identity_191634", "image": "PMC-VQA/images/PMC4592368_Fig1_429401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible abnormality in the brain structure?" }, { "from": "gpt", "value": "The answer is No visible abnormality" } ], "clip_score": 30.9375, "loss": 2.919398546218872 }, { "id": "identity_309350", "image": "UniGeo/images/392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的弦,AB=5,点C是⊙O上的一个动点,且∠ACB=45°,若点M、N分别是AB、AC的中点,则MN长的最大值是多少?\nChoices:\n(A) \\frac{5√{2}}{2}\n(B) 5√{2}\n(C) \\frac{5}{2}\n(D) 3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.921875, "loss": 2.266295909881592 }, { "id": "identity_124753", "image": "PMC-VQA/images/PMC8750680_cancers-14-00074-f001_162337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the abnormality indicated by the arrow appear to be solid or cystic?" }, { "from": "gpt", "value": "The answer is Solid" } ], "clip_score": 26.671875, "loss": 3.04862904548645 }, { "id": "identity_173242", "image": "IconQA/images/55339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If this clock loses 5 minutes every 30 minutes, what will the time be after 3 hours?" }, { "from": "gpt", "value": "The answer is 5:30 PM" } ], "clip_score": 32.875, "loss": 2.0105233192443848 }, { "id": "identity_319463", "image": "DVQA/images/bar_train_00127126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart shown in a percentage scale?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.21875, "loss": 3.6104063987731934 }, { "id": "identity_231313", "image": "ScienceQA/images/11516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How do the physical properties of these three objects contribute to their common functionality, and what is that functionality?" }, { "from": "gpt", "value": "The answer is They all utilize elasticity to bounce" } ], "clip_score": 15.3984375, "loss": 3.255610942840576 }, { "id": "identity_297596", "image": "A-OKVQA/images/286234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During her practice session, the tennis player is enclosed by nets because she is playing against whom?\nChoices:\n(A) nobody\n(B) audience\n(C) player\n(D) machine" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.125, "loss": 4.2153120040893555 }, { "id": "identity_222796", "image": "VQA2.0/images/282843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the surfboard of the surfer riding the wave?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.890625, "loss": 2.950019359588623 }, { "id": "identity_108348", "image": "PMC-VQA/images/PMC7688799_Fig2_304939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What medical procedure is being performed in the image?" }, { "from": "gpt", "value": "The answer is Endoscopy" } ], "clip_score": 28.125, "loss": 2.369745969772339 }, { "id": "identity_176398", "image": "TabMWP/images/1696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the percentage increase in population density per square mile for Nevada from 1990 to 2000?" }, { "from": "gpt", "value": "The answer is 80%" } ], "clip_score": 32.90625, "loss": 1.8935956954956055 }, { "id": "identity_140049", "image": "DVQA/images/bar_train_00015144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between 'career' and 'mass'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.046875, "loss": 2.594461679458618 }, { "id": "identity_53346", "image": "ChartQA/images/multi_col_80478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount of debt in 2019 US dollars for all bachelor's degree recipients in the year 2013/14?" }, { "from": "gpt", "value": "The answer is 16,300" } ], "clip_score": 30.828125, "loss": 2.140087604522705 }, { "id": "identity_47069", "image": "FigureQA/images/57294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.9375, "loss": 2.441098928451538 }, { "id": "identity_100796", "image": "IconQA/images/29783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line in millimeters?" }, { "from": "gpt", "value": "The answer is 60 mm" } ], "clip_score": 31.234375, "loss": 2.1990556716918945 }, { "id": "identity_258774", "image": "PlotQA/images/21584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual growth rate of internet users (% of population) from 1994 to 1998?" }, { "from": "gpt", "value": "The answer is 0.75%" } ], "clip_score": 23.625, "loss": 1.6964194774627686 }, { "id": "identity_179617", "image": "IconQA/images/74400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two pencils are added to the image, what will be the new ratio of pencils to pieces of paper?" }, { "from": "gpt", "value": "The answer is 3:3 or 1:1" } ], "clip_score": 33.65625, "loss": 1.6073591709136963 }, { "id": "identity_179751", "image": "TabMWP/images/28701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of miles driven by Mitch from Thursday to Wednesday?" }, { "from": "gpt", "value": "The answer is 645 miles" } ], "clip_score": 35.125, "loss": 2.2312746047973633 }, { "id": "identity_125288", "image": "A-OKVQA/images/438219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train on the left side of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.21875, "loss": 3.16084623336792 }, { "id": "identity_181678", "image": "IconQA/images/5207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many palm trees are there in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.96875, "loss": 2.3323936462402344 }, { "id": "identity_120129", "image": "VQA-AS/images/9817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals can be seen in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.546875, "loss": 2.3017845153808594 }, { "id": "identity_73666", "image": "AI2D/images/1668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the partially shaded outer region of the shadow cast by the Earth or Moon?" }, { "from": "gpt", "value": "The answer is penumbra" } ], "clip_score": 28.765625, "loss": 2.603841543197632 }, { "id": "identity_249468", "image": "PMC-VQA/images/PMC7410186_F3_229782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of roots shown in the image and explain the characteristics that differentiate them from other types of roots" }, { "from": "gpt", "value": "The answer is Primary roots" } ], "clip_score": 30.828125, "loss": 5.877443790435791 }, { "id": "identity_296414", "image": "PMC-VQA/images/PMC9354698_f2_364694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI image is displayed in Panel C among the following choices: T1+C, T1, T2, or FLAIR?\nChoices:\n(A) T1+C \n(B) T1 \n(C) T2 \n(D) FLAIR" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.46875, "loss": 4.16327428817749 }, { "id": "identity_263639", "image": "PlotQA/images/26905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On the top tick, how many bars are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 14.6796875, "loss": 3.063727617263794 }, { "id": "identity_280418", "image": "AI2D/images/1868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the primary source of energy for deer according to the food web diagram provided?\nChoices:\n(A) owl\n(B) fox\n(C) none\n(D) plants" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.53125, "loss": 4.221086502075195 }, { "id": "identity_168960", "image": "TabMWP/images/37227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jeffrey wants to buy the cheapest book, which one should he choose?" }, { "from": "gpt", "value": "The answer is The book about baseball" } ], "clip_score": 31.09375, "loss": 3.867159843444824 }, { "id": "identity_234765", "image": "Geometry3K/images/580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the trapezoids are similar, find the value of \\( y \\) and then calculate the perimeter of trapezoid \\( WXYZ \\)" }, { "from": "gpt", "value": "The answer is 26.5" } ], "clip_score": 36.28125, "loss": 2.727189779281616 }, { "id": "identity_293343", "image": "DVQA/images/bar_train_00024517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all categories, which object is the most preferred by the highest number of people?" }, { "from": "gpt", "value": "The answer is golf" } ], "clip_score": 29.5, "loss": 3.0561628341674805 }, { "id": "identity_22135", "image": "GeoQA+/images/1623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD与四边形EFGH位似,位似中心是O,若OA:OE=0.333333,且四边形ABCD的周长为4,则四边形EFGH的周长为()\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) 20" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.640625, "loss": 2.474538803100586 }, { "id": "identity_205343", "image": "Super-CLEVR/images/superCLEVR_new_001217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle that is the same color as the double bus?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.375, "loss": 3.3691298961639404 }, { "id": "identity_16221", "image": "FigureQA/images/28513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 4.517612457275391 }, { "id": "identity_145988", "image": "MapQA/images/map_4776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest value range for the number of thanks?" }, { "from": "gpt", "value": "The answer is New York" } ], "clip_score": 32.71875, "loss": 2.189802408218384 }, { "id": "identity_55822", "image": "FigureQA/images/42731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the smallest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Dark Salmon" } ], "clip_score": 37.53125, "loss": 3.013587474822998 }, { "id": "identity_43062", "image": "MapQA/images/map_3750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 5200-5400" } ], "clip_score": 22.65625, "loss": 1.842500925064087 }, { "id": "identity_98640", "image": "IconQA/images/18214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shapes are moons?" }, { "from": "gpt", "value": "The answer is 1/3" } ], "clip_score": 25.34375, "loss": 1.3653292655944824 }, { "id": "identity_11", "image": "DocVQA/images/flmh0077_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which Company's invite is this?" }, { "from": "gpt", "value": "The answer is R.J. Reynolds Tobacco Company" } ], "clip_score": 23.953125, "loss": 1.400687575340271 }, { "id": "identity_293902", "image": "AI2D/images/1013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the tube depicted in the image?\nChoices:\n(A) Floral\n(B) Style\n(C) None of the above\n(D) Beard" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.21875, "loss": 4.0958571434021 }, { "id": "identity_165272", "image": "GeoQA+/images/2850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, and point A is the midpoint of EC, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 63 degrees" } ], "clip_score": 35.59375, "loss": 2.585932731628418 }, { "id": "identity_29463", "image": "TabMWP/images/9390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a quilt cost than a pillow?" }, { "from": "gpt", "value": "The answer is 13.47" } ], "clip_score": 29.875, "loss": 2.3188815116882324 }, { "id": "identity_288831", "image": "VQA2.0/images/334588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of headlights that are currently visible?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 9.984375, "loss": 2.3749701976776123 }, { "id": "identity_52611", "image": "DocVQA/images/ynjf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the production date of this document?" }, { "from": "gpt", "value": "The answer is Produced in MDL on 02/27/13" } ], "clip_score": 16.75, "loss": 3.046736478805542 }, { "id": "identity_39469", "image": "FigureQA/images/52866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Dark Green section larger than the Violet Red section?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.90625, "loss": 3.3478026390075684 }, { "id": "identity_306210", "image": "UniGeo/images/2913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,PA为⊙O的切线,A为切点,PO交⊙O于点B,OA=3,OP=6,那么∠BAP的度数是()\n选项:\n(A) 30°\n(B) 45°\n(C) 60°\n(D) 35°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.953125, "loss": 2.764195680618286 }, { "id": "identity_21558", "image": "A-OKVQA/images/179153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the woman in the white jacket wearing a helmet?\nChoices:\n(A) dress code\n(B) for fun\n(C) style\n(D) protection" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.90625, "loss": 4.0353007316589355 }, { "id": "identity_47785", "image": "DocVQA/images/mzbx0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Executive Officer of the Institution?" }, { "from": "gpt", "value": "The answer is Philip J. Keohan" } ], "clip_score": 21.328125, "loss": 3.3606090545654297 }, { "id": "identity_80284", "image": "UniGeo/images/2510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 15°" } ], "clip_score": 30.453125, "loss": 2.7343075275421143 }, { "id": "identity_152161", "image": "ScienceQA/images/697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stars are visible on the cup?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.09375, "loss": 2.2770700454711914 }, { "id": "identity_5878", "image": "DocVQA/images/yqdv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number mentioned in the document?" }, { "from": "gpt", "value": "The answer is 754-8881" } ], "clip_score": 22.359375, "loss": 2.5582966804504395 }, { "id": "identity_302942", "image": "TQA/images/abc_question_images/parts_worm_17313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates this structure?\nChoices:\n(A) T\n(B) U\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.59375, "loss": 4.417394638061523 }, { "id": "identity_298517", "image": "GeoQA+/images/4355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, line segment AB is the diameter of circle O, chord CD is perpendicular to AB. If ∠BOC = 80°, what is the measure of ∠BAD?\nChoices:\n(A) 10°\n(B) 40°\n(C) 45°\n(D) 80°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.90625, "loss": 2.2597289085388184 }, { "id": "identity_3528", "image": "AI2D/images/1504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what temperature does water freeze in Celsius?\nChoices:\n(A) 0\n(B) -17\n(C) 100\n(D) 144" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.015625, "loss": 3.651942491531372 }, { "id": "identity_159416", "image": "Geometry3K/images/1462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 46.40 degrees" } ], "clip_score": 32.6875, "loss": 2.5434954166412354 }, { "id": "identity_227626", "image": "DocVQA/images/rlvm0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the project in days, and what is the total grant amount per day?" }, { "from": "gpt", "value": "The answer is 365 days, $186.08 per day" } ], "clip_score": 20.828125, "loss": 2.095874547958374 }, { "id": "identity_313993", "image": "PMC-VQA/images/PMC3915685_f1-mco-01-05-0851_264807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cerebral artery is blocked in this MRI?\nChoices:\n(A) Anterior cerebral artery \n(B) Posterior cerebral artery \n(C) Middle cerebral artery \n(D) Basilar artery" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.796875, "loss": 3.64916729927063 }, { "id": "identity_166664", "image": "IconQA/images/51290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the frame is filled with shapes?" }, { "from": "gpt", "value": "The answer is 1/10" } ], "clip_score": 25.484375, "loss": 1.4965711832046509 }, { "id": "identity_316454", "image": "TextVQA/images/050c0066b1ed14bd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this?" }, { "from": "gpt", "value": "The answer is merlot" } ], "clip_score": 31.28125, "loss": 3.3517959117889404 }, { "id": "identity_250202", "image": "GeoQA+/images/2129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC与△DEF是位似图形,相似比为2:3,已知AB=3,BC=4,则EF的长为多少?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.21875, "loss": 2.2997524738311768 }, { "id": "identity_261180", "image": "PMC-VQA/images/PMC7868013_Fig2_362950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How would you calculate the center-edge angle if the vertical line is drawn from the center of the femoral head and the lateral acetabular wall margin is 30 degrees from this vertical line?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 25.703125, "loss": 1.8154959678649902 }, { "id": "identity_157583", "image": "TQA/images/question_images/sun_layers_4062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the outermost layer of the sun as labeled in the image?" }, { "from": "gpt", "value": "The answer is Photosphere" } ], "clip_score": 32.5625, "loss": 2.2423059940338135 }, { "id": "identity_263829", "image": "PMC-VQA/images/PMC7959875_FIG2_391530.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the computed tomography image? Is it:\n(A) Lungs\n(B) Abdomen\n(C) Heart\n(D) Kidneys" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 5.194989204406738 }, { "id": "identity_324531", "image": "A-OKVQA/images/524470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person here using to get around?\nChoices:\n(A) scooter\n(B) skateboard\n(C) car\n(D) bike" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.8671875, "loss": 4.044836044311523 }, { "id": "identity_62826", "image": "ScienceQA/images/10517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the object labeled as track suit?" }, { "from": "gpt", "value": "The answer is Athletic wear or sportswear" } ], "clip_score": 33.5, "loss": 2.5312881469726562 }, { "id": "identity_117192", "image": "A-OKVQA/images/100726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is in the room?" }, { "from": "gpt", "value": "The answer is Carpet" } ], "clip_score": 17.609375, "loss": 3.2973673343658447 }, { "id": "identity_166870", "image": "IconQA/images/67969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Maria started washing her dog at 9:00 AM, how long has she been washing her dog?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 21.421875, "loss": 1.741795301437378 }, { "id": "identity_250185", "image": "PMC-VQA/images/PMC6940314_Fig3_93321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences in imaging characteristics between a DWI scan and a 68Ga-PSMA-PET scan, and how do these differences help in identifying the fusion of these two imaging modalities in patient 2?" }, { "from": "gpt", "value": "The answer is Fusion imaging combines anatomical and functional details for comprehensive analysis" } ], "clip_score": 30.921875, "loss": 2.8500704765319824 }, { "id": "identity_63088", "image": "ScienceQA/images/14328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If these two magnets were free to move, what would be their likely motion due to magnetic force?" }, { "from": "gpt", "value": "The answer is They would move apart" } ], "clip_score": 21.25, "loss": 3.8715665340423584 }, { "id": "identity_267710", "image": "ScienceQA/images/4401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood that a koi fish resulting from this cross will have black eyes?\nChoices:\n(A) 3/4\n(B) 0/4\n(C) 1/4\n(D) 2/4\n(E) 4/4" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.390625, "loss": 3.299328327178955 }, { "id": "identity_169648", "image": "TabMWP/images/27826.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of tractors made per month over the four months?" }, { "from": "gpt", "value": "The answer is 967.5" } ], "clip_score": 34.40625, "loss": 1.8686288595199585 }, { "id": "identity_255119", "image": "ScienceQA/images/8239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony shown on the map is located directly south of Massachusetts and is known for being the smallest U.S. state by area?" }, { "from": "gpt", "value": "The answer is Rhode Island" } ], "clip_score": 31.734375, "loss": 1.8107341527938843 }, { "id": "identity_276969", "image": "VQA-RAD/images/synpic47974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the hyperdensities on the periphery of the image represent?" }, { "from": "gpt", "value": "The answer is Ribs" } ], "clip_score": 22.84375, "loss": 2.9694392681121826 }, { "id": "identity_108259", "image": "PMC-VQA/images/PMC6582478_Fig4_495892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify any signs of edema in the brain tissue?" }, { "from": "gpt", "value": "The answer is Yes, hyperintense area in the right frontal lobe suggests edema" } ], "clip_score": 33.5, "loss": 2.3266868591308594 }, { "id": "identity_66858", "image": "ScienceQA/images/7558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one magnet is flipped, what interaction will occur between the magnets?" }, { "from": "gpt", "value": "The answer is The magnets will attract each other" } ], "clip_score": 19.40625, "loss": 2.6377148628234863 }, { "id": "identity_46376", "image": "DVQA/images/bar_train_00068689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the category represented by the color green in the bar labeled 'child'?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 22.515625, "loss": 2.3137717247009277 }, { "id": "identity_326187", "image": "GeoQA+/images/1267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知两条平行线AB和CD,直线l与它们分别相交于点E和F,将l绕点E逆时针旋转40°后,与AB相交于点G,若∠GEC=80°,那么∠GFE=()\n\nChoices:\n(A) 60°\n(B) 50°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.734375, "loss": 2.517167329788208 }, { "id": "identity_148500", "image": "DocVQA/images/lghx0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the letter addressed to?" }, { "from": "gpt", "value": "The answer is Mr. Francis A. Kleinhenz, President of University of New Mexico" } ], "clip_score": 24.75, "loss": 2.576639175415039 }, { "id": "identity_182513", "image": "VQA2.0/images/215819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aircraft is shown in the image?" }, { "from": "gpt", "value": "The answer is A small, private, single-engine airplane" } ], "clip_score": 29.6875, "loss": 2.6059622764587402 }, { "id": "identity_190614", "image": "PMC-VQA/images/PMC5455666_materials-08-04287-f005_173123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the element symbol for the material shown in the SEM image?" }, { "from": "gpt", "value": "The answer is Fe" } ], "clip_score": 25.921875, "loss": 4.012229919433594 }, { "id": "identity_45396", "image": "FigureQA/images/70905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Light Salmon ever exceed a value of 60?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.0625, "loss": 2.9581005573272705 }, { "id": "identity_213692", "image": "TextVQA/images/26cb1858072bdfc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the presumed location of this scene according to the caption?" }, { "from": "gpt", "value": "The answer is Boston Rd. approaching 3rd Ave" } ], "clip_score": 28.359375, "loss": 1.754935383796692 }, { "id": "identity_196810", "image": "Super-CLEVR/images/superCLEVR_new_001986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that has the same color as the sky?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.0, "loss": 3.302366256713867 }, { "id": "identity_313480", "image": "A-OKVQA/images/453409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the tower behind the central festival decoration?\nChoices:\n(A) cellular service\n(B) radar\n(C) television\n(D) radio" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.671875, "loss": 4.647611618041992 }, { "id": "identity_61333", "image": "DVQA/images/bar_train_00151178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest percentage of people preferring 'wife'?" }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 29.046875, "loss": 3.4989209175109863 }, { "id": "identity_225664", "image": "DocVQA/images/kxxx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total amount of Personnel (Salaries) and Fringe Benefits in the current budget?" }, { "from": "gpt", "value": "The answer is 19,912.00" } ], "clip_score": 23.671875, "loss": 1.783762812614441 }, { "id": "identity_26104", "image": "ScienceQA/images/18642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Michigan?\nChoices:\n(A) Little Rock\n(B) Lansing\n(C) Grand Rapids\n(D) Springfield" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.192807197570801 }, { "id": "identity_80145", "image": "GeoQA+/images/5429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,F是△ABC的角平分线CD和BE的交点,若∠ACG=32°,则∠AFB的度数是()" }, { "from": "gpt", "value": "The answer is 59.5°" } ], "clip_score": 30.046875, "loss": 1.8295872211456299 }, { "id": "identity_269588", "image": "VizWiz/images/VizWiz_train_00004936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please tell me the flavor of this ice cream? Thank you." }, { "from": "gpt", "value": "The answer is blackberry cobbler" } ], "clip_score": 19.765625, "loss": 2.8739101886749268 }, { "id": "identity_91534", "image": "UniGeo/images/1186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of the circle with center O is 10 units, what is the length of the minor arc AC?" }, { "from": "gpt", "value": "The answer is approximately 8.73 units" } ], "clip_score": 32.0625, "loss": 2.0899271965026855 }, { "id": "identity_177576", "image": "IconQA/images/85513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 4 marbles from the fourth row, how many marbles will be left in that row?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.578125, "loss": 2.4633243083953857 }, { "id": "identity_26999", "image": "TabMWP/images/4888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A baseball player paid attention to how many baseballs he hit in practice each day. What is the median of the numbers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 35.34375, "loss": 2.6839663982391357 }, { "id": "identity_174773", "image": "TabMWP/images/17464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long is the train scheduled to stop at Dayton?" }, { "from": "gpt", "value": "The answer is 15 minutes" } ], "clip_score": 33.59375, "loss": 1.9058809280395508 }, { "id": "identity_191753", "image": "PMC-VQA/images/PMC7681181_Fig14_302897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the patient's body is the abnormality located on?" }, { "from": "gpt", "value": "The answer is The patient's right side" } ], "clip_score": 28.125, "loss": 3.037572145462036 }, { "id": "identity_79743", "image": "Geometry3K/images/798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle PTS?" }, { "from": "gpt", "value": "The answer is 13.4" } ], "clip_score": 29.828125, "loss": 2.3605446815490723 }, { "id": "identity_334495", "image": "DocVQA/images/rpwx0225_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this effective?" }, { "from": "gpt", "value": "The answer is 3-22-01" } ], "clip_score": 12.8125, "loss": 2.1652402877807617 }, { "id": "identity_204251", "image": "A-OKVQA/images/550529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of terrain is this bike designed for based on its tire tread pattern?" }, { "from": "gpt", "value": "The answer is Off-road or rough terrain" } ], "clip_score": 23.8125, "loss": 2.3711795806884766 }, { "id": "identity_15318", "image": "Super-CLEVR/images/superCLEVR_new_000337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of airliners are big yellow shiny things or small green objects?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.734375, "loss": 1.9646238088607788 }, { "id": "identity_198647", "image": "TextVQA/images/0ab462028eb0810e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On the bottom shelf, what is the title of the book with a white and orange spine?" }, { "from": "gpt", "value": "The answer is The Little Brown Handbook" } ], "clip_score": 24.140625, "loss": 3.5118155479431152 }, { "id": "identity_170768", "image": "IconQA/images/869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 60 times, how many times can we expect it to land on the white section?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 22.953125, "loss": 2.4252657890319824 }, { "id": "identity_280847", "image": "TQA/images/abc_question_images/human_system_eye_16146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can the choroid be found?\nChoices:\n(A) K\n(B) Y\n(C) S\n(D) X" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.65625, "loss": 4.049057483673096 }, { "id": "identity_242553", "image": "PMC-VQA/images/PMC4378709_fig6_373043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing imaging characteristics of a lipoma compared to other types of masses such as sarcoma, lymphoma, and osteoma, and how do these characteristics help in diagnosing the mass in the given image?" }, { "from": "gpt", "value": "The answer is Lipoma has well-circumscribed, homogeneous, hypointense characteristics on T1-weighted MRI" } ], "clip_score": 27.34375, "loss": 2.583327531814575 }, { "id": "identity_116119", "image": "VQA2.0/images/521867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the chair visible in the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.28125, "loss": 3.9955451488494873 }, { "id": "identity_192148", "image": "PMC-VQA/images/PMC7707631_IJMS-45-485-g001.tif_311710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of midline shift in this brain image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.828125, "loss": 2.7582077980041504 }, { "id": "identity_285741", "image": "A-OKVQA/images/267611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely action the dog will take with the frisbee shown in the image?\nChoices:\n(A) what frisbee\n(B) avoid it\n(C) catch it\n(D) nothing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.455776691436768 }, { "id": "identity_8011", "image": "TextVQA/images/783fae7f3b5248f2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the word after barely on the sign to the far left?" }, { "from": "gpt", "value": "The answer is legal" } ], "clip_score": 17.328125, "loss": 4.07627010345459 }, { "id": "identity_100696", "image": "TabMWP/images/20416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a recipe calls for 4 pounds of a mixture of American and ricotta cheese in a 1:1 ratio, how much would the required cheese cost?" }, { "from": "gpt", "value": "The answer is $28.02" } ], "clip_score": 34.59375, "loss": 2.126126766204834 }, { "id": "identity_300021", "image": "PMC-VQA/images/PMC6110783_Fig2_359575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the main subject of the experiment depicted in the image?\nChoices:\n(A) Analysis of AQP4 in the brain\n(B) Investigation of the blood-brain barrier in EAE mice\n(C) Examination of GFAP expression in astrocytes\n(D) Comparison of spinal cord structure in healthy mice and EAE mice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.1875, "loss": 4.207808971405029 }, { "id": "identity_250433", "image": "A-OKVQA/images/26448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the material used to depict the snow in this art piece and explain why it is not one of the other materials listed" }, { "from": "gpt", "value": "The answer is cotton" } ], "clip_score": 27.046875, "loss": 4.07871675491333 }, { "id": "identity_330582", "image": "GeoQA+/images/3963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图, l_{1}∥l_{2}∥l_{3}, BC=1, \\frac{DE}{EF}=\\frac{3}{2}, 那么 AB 的长度是()\n选项:\n(A) 4\n(B) 2\n(C) \\frac{3}{2}\n(D) \\frac{2}{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.15625, "loss": 2.6585097312927246 }, { "id": "identity_41552", "image": "FigureQA/images/97773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Light Green between x-axis 20 and 80?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 24.734375, "loss": 2.5153255462646484 }, { "id": "identity_37690", "image": "IconQA/images/24889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the rectangle?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 33.59375, "loss": 2.8041622638702393 }, { "id": "identity_309115", "image": "A-OKVQA/images/320150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How could someone theoretically grab items here?\nChoices:\n(A) claw\n(B) betting number\n(C) pleading\n(D) blowing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.0, "loss": 3.809396743774414 }, { "id": "identity_26066", "image": "DocVQA/images/rgvh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the page number at top of the page?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.078125, "loss": 2.2444584369659424 }, { "id": "identity_32773", "image": "TQA/images/question_images/earth_magnetic_field_6783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the degree difference between the North Magnetic Pole and the Geographic North Pole?\nChoices:\n(A) 11.5 degrees\n(B) South Magnetic Pole\n(C) Geographic South Pole\n(D) 2670 degrees" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.28125, "loss": 3.7235374450683594 }, { "id": "identity_331218", "image": "DocVQA/images/jnyb0018_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the attendees for the breakfast event on Monday, May 8?" }, { "from": "gpt", "value": "The answer is jim helm" } ], "clip_score": 27.328125, "loss": 5.359821796417236 }, { "id": "identity_52170", "image": "FigureQA/images/97210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value achieved by Cornflower?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 19.125, "loss": 2.7178187370300293 }, { "id": "identity_94880", "image": "TabMWP/images/19785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students own exactly 2 jackets?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.25, "loss": 2.2563419342041016 }, { "id": "identity_250341", "image": "TabMWP/images/14161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average temperature recorded by Lola over the given period?" }, { "from": "gpt", "value": "The answer is 21.6" } ], "clip_score": 27.234375, "loss": 2.5619993209838867 }, { "id": "identity_141399", "image": "DVQA/images/bar_train_00018032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy value for the 'faint' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is Approximately 4.14" } ], "clip_score": 33.9375, "loss": 2.4226768016815186 }, { "id": "identity_215188", "image": "A-OKVQA/images/198437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equestrian event does the setting suggest?" }, { "from": "gpt", "value": "The answer is Dressage or show jumping event" } ], "clip_score": 26.1875, "loss": 2.3480422496795654 }, { "id": "identity_22944", "image": "DocVQA/images/ttvw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much is the Writer's Fee?" }, { "from": "gpt", "value": "The answer is $3,000" } ], "clip_score": 21.59375, "loss": 1.7198371887207031 }, { "id": "identity_207558", "image": "PMC-VQA/images/PMC5606599_vetsci-04-00020-f006_217799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the green waveform at the bottom of the image represent?" }, { "from": "gpt", "value": "The answer is Electrocardiogram (ECG or EKG)" } ], "clip_score": 26.9375, "loss": 1.4685983657836914 }, { "id": "identity_204306", "image": "Super-CLEVR/images/superCLEVR_new_001838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are depicted in the image?" }, { "from": "gpt", "value": "The answer is Scooters" } ], "clip_score": 27.171875, "loss": 2.923614501953125 }, { "id": "identity_279793", "image": "CLEVR-Math/images/CLEVR_train_051398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 3 rubber cylinders, how many rubber cylinders remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 18.625, "loss": 2.1435468196868896 }, { "id": "identity_105433", "image": "PMC-VQA/images/PMC3610264_F2_195365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter is associated with this image in the series?" }, { "from": "gpt", "value": "The answer is Letter F" } ], "clip_score": 24.15625, "loss": 3.2259347438812256 }, { "id": "identity_16625", "image": "TextVQA/images/4b9b84d9953c0d25.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is this trolley numbered?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 30.03125, "loss": 2.0079360008239746 }, { "id": "identity_337085", "image": "AI2D/images/1858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the given diagram, identify a sequence of organisms with three levels.\nChoices:\n(A) phytoplankton-krill-penguin\n(B) krill-phytoplankton-fish\n(C) fish-phytoplankton-krill\n(D) all are correct" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.03125, "loss": 2.989859104156494 }, { "id": "identity_154597", "image": "TQA/images/question_images/faults_1734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of stress is shown in the image acting on the fault?" }, { "from": "gpt", "value": "The answer is Compressional stress" } ], "clip_score": 35.6875, "loss": 2.813018321990967 }, { "id": "identity_129600", "image": "VQA2.0/images/94625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geometric shape is the roof of the station?" }, { "from": "gpt", "value": "The answer is Pyramid" } ], "clip_score": 21.90625, "loss": 2.8478105068206787 }, { "id": "identity_64664", "image": "AI2D/images/4204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the end product of weathering and erosion of rocks?" }, { "from": "gpt", "value": "The answer is sediments" } ], "clip_score": 18.84375, "loss": 3.602879285812378 }, { "id": "identity_206259", "image": "PMC-VQA/images/PMC4418099_Fig1_383036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the area where the hematoma is located compared to the surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is Higher density" } ], "clip_score": 33.09375, "loss": 3.4692835807800293 }, { "id": "identity_275575", "image": "VQA-RAD/images/synpic52988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do patchy infiltrates exist?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.359375, "loss": 2.9477033615112305 }, { "id": "identity_104399", "image": "VizWiz/images/VizWiz_train_00014424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the shadow suggest it is?" }, { "from": "gpt", "value": "The answer is Late afternoon or early evening" } ], "clip_score": 13.6484375, "loss": 2.876002311706543 }, { "id": "identity_64901", "image": "ScienceQA/images/7178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the peninsula located in the northeastern part of the highlighted country?" }, { "from": "gpt", "value": "The answer is Yucatán Peninsula" } ], "clip_score": 24.890625, "loss": 1.9070912599563599 }, { "id": "identity_299224", "image": "TQA/images/question_images/human_system_respiratory_3767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of lungs in the human body?\nChoices:\n(A) 2\n(B) 3\n(C) 1\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.21875, "loss": 3.635331630706787 }, { "id": "identity_212097", "image": "PMC-VQA/images/PMC5664263_fig1_233623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of ventricular enlargement in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.53125, "loss": 2.9778213500976562 }, { "id": "identity_17806", "image": "IconQA/images/60205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are there?\nChoices:\n(A) 4\n(B) 2\n(C) 5\n(D) 1\n(E) 3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.03125, "loss": 3.222083330154419 }, { "id": "identity_179761", "image": "IconQA/images/50527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 9 o'clock is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 28.71875, "loss": 1.9925111532211304 }, { "id": "identity_179486", "image": "IconQA/images/19312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 3 ovals from the first row, how many ovals will remain in the image?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 34.28125, "loss": 2.2056241035461426 }, { "id": "identity_188655", "image": "TextVQA/images/5e9e94a96d262585.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the established series date for the Pollasky series?" }, { "from": "gpt", "value": "The answer is 05/2006" } ], "clip_score": 20.0625, "loss": 2.3788859844207764 }, { "id": "identity_305236", "image": "TextVQA/images/eccc95992042d813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this person?" }, { "from": "gpt", "value": "The answer is knecht" } ], "clip_score": 22.421875, "loss": 3.7123193740844727 }, { "id": "identity_186492", "image": "TextVQA/images/5132f0a0c25030bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many digital clocks are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.625, "loss": 4.523283958435059 }, { "id": "identity_67917", "image": "AI2D/images/175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the location labeled \"Earthquake Epicenter\"?" }, { "from": "gpt", "value": "The answer is Epicenter" } ], "clip_score": 29.8125, "loss": 2.7914140224456787 }, { "id": "identity_27876", "image": "GeoQA+/images/4250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C,D分别为⊙O上一点,∠DOB=64°,∠D=∠B,则∠B等于()\nChoices:\n(A) 13°\n(B) 14°\n(C) 15°\n(D) 16°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.203125, "loss": 2.4631247520446777 }, { "id": "identity_254136", "image": "A-OKVQA/images/206397.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary purpose of the process related to these animals, and how does it benefit both the animals and humans?" }, { "from": "gpt", "value": "The answer is Provides wool, prevents overheating, reduces parasitic infections" } ], "clip_score": 27.890625, "loss": 3.223830223083496 }, { "id": "identity_291048", "image": "Geometry3K/images/1217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 7\n(B) 9\n(C) 12\n(D) 15" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.265625, "loss": 2.9351799488067627 }, { "id": "identity_36031", "image": "PMC-VQA/images/PMC4477328_f2_399070.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many views are shown for the T9 vertebra?\nChoices:\n(A) One \n(B) Two \n(C) Three \n(D) Four" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 3.638460636138916 }, { "id": "identity_198279", "image": "TextVQA/images/5065fa26c7b0eada.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.78125, "loss": 4.134072303771973 }, { "id": "identity_55264", "image": "ChartQA/images/4215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the least cited reason for not expecting more children according to the graph?" }, { "from": "gpt", "value": "The answer is No willing partner" } ], "clip_score": 30.5, "loss": 2.9280669689178467 }, { "id": "identity_62218", "image": "DocVQA/images/nnvc0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is William J. Darby" } ], "clip_score": 27.6875, "loss": 3.0271196365356445 }, { "id": "identity_310965", "image": "ChartQA/images/two_col_3896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much did the unemployment rate change from one year to the next between 2019 and 2020?" }, { "from": "gpt", "value": "The answer is 1.28" } ], "clip_score": 28.296875, "loss": 2.409428834915161 }, { "id": "identity_240270", "image": "ScienceQA/images/16091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the passage, identify two specific weather conditions mentioned and explain how they impact the operation of the wind turbines shown in the image" }, { "from": "gpt", "value": "The answer is High winds can increase energy production; storms can cause damage" } ], "clip_score": 25.359375, "loss": 3.272045612335205 }, { "id": "identity_28633", "image": "A-OKVQA/images/513602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most common horse competition?\nChoices:\n(A) riding\n(B) polo\n(C) racing\n(D) jumping" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.109375, "loss": 4.466185569763184 }, { "id": "identity_188058", "image": "A-OKVQA/images/411480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of body of water is shown at the bottom of the cliff?" }, { "from": "gpt", "value": "The answer is A lake or a fjord, based on the image context" } ], "clip_score": 29.65625, "loss": 2.6999611854553223 }, { "id": "identity_36498", "image": "TabMWP/images/6288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 1/2 of a pound of parchment scrap?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 30.703125, "loss": 3.0788214206695557 }, { "id": "identity_195208", "image": "PMC-VQA/images/PMC5794130_F1_270365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible signs of sinusitis in this image?" }, { "from": "gpt", "value": "The answer is No visible signs of sinusitis in this image" } ], "clip_score": 27.359375, "loss": 1.9615775346755981 }, { "id": "identity_322038", "image": "VQA2.0/images/525300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why are they wearing something on their heads?" }, { "from": "gpt", "value": "The answer is because of ball" } ], "clip_score": 15.0234375, "loss": 4.318111419677734 }, { "id": "identity_273189", "image": "AI2D/images/3521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure is found at the center of the nucleus?\nChoices:\n(A) Cell membrane\n(B) Cilia\n(C) Nucleolus\n(D) Nuclear envelope" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.671875, "loss": 4.043886184692383 }, { "id": "identity_143169", "image": "DocVQA/images/sflm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the section number for Peter H. Raven?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 13.28125, "loss": 2.956472873687744 }, { "id": "identity_324511", "image": "A-OKVQA/images/292697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this type of layout called?\nChoices:\n(A) photography\n(B) finger paint\n(C) square match\n(D) collage" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.015625, "loss": 3.9401633739471436 }, { "id": "identity_305215", "image": "IconQA/images/18849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure this. It is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.515625, "loss": 2.4711644649505615 }, { "id": "identity_81903", "image": "Geometry3K/images/288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the angle marked (9x + 26)°?" }, { "from": "gpt", "value": "The answer is 71.875°" } ], "clip_score": 31.78125, "loss": 1.8977130651474 }, { "id": "identity_233738", "image": "PMC-VQA/images/PMC5975718_F10_320595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined staining pattern shown in A'' for FITC-Alb, CD8b, and Iba, and how does it indicate the presence of microglial cells?" }, { "from": "gpt", "value": "The answer is Microglial cells stained with FITC-Alb, CD8b, and Iba" } ], "clip_score": 37.78125, "loss": 1.4604456424713135 }, { "id": "identity_74008", "image": "ScienceQA/images/1150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers does the sandwich bag have?" }, { "from": "gpt", "value": "The answer is One layer" } ], "clip_score": 26.84375, "loss": 4.174225330352783 }, { "id": "identity_334215", "image": "DocVQA/images/jggn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the identifier for this page in the document?" }, { "from": "gpt", "value": "The answer is d687701" } ], "clip_score": 24.546875, "loss": 3.449634552001953 }, { "id": "identity_137478", "image": "PlotQA/images/108123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two categories represented by the bars in the graph?" }, { "from": "gpt", "value": "The answer is All education staff compensation, Current education expenditure" } ], "clip_score": 25.53125, "loss": 2.4743266105651855 }, { "id": "identity_150749", "image": "AI2D/images/3648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the blood vessel that brings oxygenated blood from the lungs to the heart?" }, { "from": "gpt", "value": "The answer is Pulmonary vein" } ], "clip_score": 29.640625, "loss": 1.6988824605941772 }, { "id": "identity_120595", "image": "Super-CLEVR/images/superCLEVR_new_001474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the largest object in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 24.515625, "loss": 3.478639602661133 }, { "id": "identity_69380", "image": "AI2D/images/872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food chain in this food web?" }, { "from": "gpt", "value": "The answer is hawks" } ], "clip_score": 33.09375, "loss": 2.2385177612304688 }, { "id": "identity_332134", "image": "PMC-VQA/images/PMC3618245_F11_197084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the arrows in the VA scan?\nChoices:\n(A) Blood vessels \n(B) Calcifications \n(C) Nodules \n(D) Cysts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.382221698760986 }, { "id": "identity_277738", "image": "ScienceQA/images/7364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property does this object possess?\nChoices:\n(A) slippery\n(B) sticky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.4375, "loss": 4.109824180603027 }, { "id": "identity_20554", "image": "A-OKVQA/images/187286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason must the cyclists stop here?\nChoices:\n(A) green light\n(B) boredom\n(C) they're tired\n(D) train crossing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.9375, "loss": 4.51201057434082 }, { "id": "identity_42868", "image": "DVQA/images/bar_train_00029864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest number of people preferring 'teeth'?" }, { "from": "gpt", "value": "The answer is bare" } ], "clip_score": 26.046875, "loss": 4.159063339233398 }, { "id": "identity_66405", "image": "AI2D/images/829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, which stage is represented by the transformation from Larva to Pupa?" }, { "from": "gpt", "value": "The answer is Pupa" } ], "clip_score": 38.8125, "loss": 1.9313515424728394 }, { "id": "identity_19285", "image": "FigureQA/images/66797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Turquoise have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.3125, "loss": 3.8623785972595215 }, { "id": "identity_237614", "image": "GeoQA+/images/657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 已知:如图,△ABC中,AB=AC,BD为∠ABC的平分线,∠BDC=75°,求∠B和∠C的度数。" }, { "from": "gpt", "value": "The answer is \\( 105^\\circ \\)" } ], "clip_score": 33.125, "loss": 2.962343215942383 }, { "id": "identity_130552", "image": "VQA2.0/images/293272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible under umbrellas in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.453125, "loss": 2.853815793991089 }, { "id": "identity_39744", "image": "FigureQA/images/5582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.703125, "loss": 2.459456205368042 }, { "id": "identity_329578", "image": "TQA/images/abc_question_images/tides_10145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is U?\nChoices:\n(A) MOON\n(B) OCEAN \n(C) SOLAR SYSTEM\n(D) EARTH" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.78125, "loss": 3.9716522693634033 }, { "id": "identity_274753", "image": "A-OKVQA/images/252384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which team is currently in the lead among the Mets, Visitor, Eaglebrook, and Jets?\nChoices:\n(A) mets\n(B) visitor\n(C) eaglebrook\n(D) jets" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.59375, "loss": 3.745882511138916 }, { "id": "identity_201445", "image": "PMC-VQA/images/PMC5394395_fig4_153636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to capture this image?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scan" } ], "clip_score": 31.0625, "loss": 1.7154496908187866 }, { "id": "identity_99404", "image": "TabMWP/images/36412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the weight lifting results?" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 28.625, "loss": 3.455265522003174 }, { "id": "identity_272531", "image": "VizWiz/images/VizWiz_train_00004404.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is happening? Hello?" }, { "from": "gpt", "value": "The answer is bull dog" } ], "clip_score": 25.421875, "loss": 3.061133623123169 }, { "id": "identity_195125", "image": "PMC-VQA/images/PMC6020204_Fig2_333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the abnormality appear to be affecting surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.953125, "loss": 2.59838604927063 }, { "id": "identity_132494", "image": "A-OKVQA/images/100516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tennis player's shirt?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.5625, "loss": 3.2341854572296143 }, { "id": "identity_47894", "image": "MapQA/images/map_11636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range for the state of New York?" }, { "from": "gpt", "value": "The answer is 25.8%-39.1%" } ], "clip_score": 28.484375, "loss": 1.7696444988250732 }, { "id": "identity_72867", "image": "ScienceQA/images/11945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many producer species are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.296875, "loss": 4.544915676116943 }, { "id": "identity_30286", "image": "PMC-VQA/images/PMC5615764_Fig1_220556.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to visualize the difference in density between the liver and spleen? \nChoices:\n(A) X-ray \n(B) MRI \n(C) CT scan \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.796875, "loss": 3.788733959197998 }, { "id": "identity_101772", "image": "TabMWP/images/15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Jayla wants to buy an equal number of pasta with meat sauce and pasta with mushrooms for a party, spending no more than $100. What is the maximum number of each pasta dish she can buy?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.671875, "loss": 3.018103837966919 }, { "id": "identity_26087", "image": "DocVQA/images/rtgj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the age group mentioned in the page?" }, { "from": "gpt", "value": "The answer is Between 1951 and 1971" } ], "clip_score": 16.84375, "loss": 2.4794743061065674 }, { "id": "identity_175050", "image": "IconQA/images/26620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this triangle have a right angle?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.1875, "loss": 3.144239902496338 }, { "id": "identity_63946", "image": "ScienceQA/images/2441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 25.1875, "loss": 2.766831636428833 }, { "id": "identity_178156", "image": "TabMWP/images/27126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many parks have more than one basketball hoop?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.96875, "loss": 2.19431471824646 }, { "id": "identity_24902", "image": "A-OKVQA/images/69220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the man holding?\nChoices:\n(A) chicken ring\n(B) cinnamon roll\n(C) zeppole\n(D) calamari" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.203125, "loss": 4.054839134216309 }, { "id": "identity_149132", "image": "FigureQA/images/21593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the four colors represented on the y-axis?" }, { "from": "gpt", "value": "The answer is 22.5" } ], "clip_score": 34.3125, "loss": 1.649462342262268 }, { "id": "identity_306909", "image": "AI2D/images/2362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do adult rhino beetles lay?\nChoices:\n(A) eggs\n(B) nymph\n(C) larva\n(D) pupa" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.578125, "loss": 3.944535732269287 }, { "id": "identity_27624", "image": "TextVQA/images/f8e969264fc43aa4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what jersey number is seen on the batter?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 22.84375, "loss": 2.56567645072937 }, { "id": "identity_299426", "image": "DocVQA/images/jqxf0227_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the subject?" }, { "from": "gpt", "value": "The answer is communicable disease control" } ], "clip_score": 21.09375, "loss": 3.588033676147461 }, { "id": "identity_277669", "image": "GeoQA+/images/4212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, lines AB and CD intersect at point O. If OE is perpendicular to AB and angle DOE is 58°, what is the measure of angle AOC?\nChoices:\n(A) 32°\n(B) 42°\n(C) 48°\n(D) 58°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.96875, "loss": 2.225998878479004 }, { "id": "identity_51501", "image": "DVQA/images/bar_train_00054441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had no sales in the 'weight' store?" }, { "from": "gpt", "value": "The answer is none" } ], "clip_score": 28.953125, "loss": 3.399085283279419 }, { "id": "identity_287682", "image": "ScienceQA/images/2161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is an antler classified as a mineral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.03125, "loss": 3.997307300567627 }, { "id": "identity_328029", "image": "A-OKVQA/images/473590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What photography related issue can be noticed in this image?\nChoices:\n(A) focus\n(B) noise\n(C) motion blur\n(D) exposure" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.578125, "loss": 3.9536986351013184 }, { "id": "identity_198047", "image": "PMC-VQA/images/PMC6500019_Fig1_475204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lesions bilateral?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.890625, "loss": 3.385981798171997 }, { "id": "identity_181804", "image": "CLEVR-Math/images/CLEVR_train_008301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.546875, "loss": 2.5687882900238037 }, { "id": "identity_299587", "image": "PMC-VQA/images/PMC7830087_diagnostics-11-00129-f001_351373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scan was performed on the patient in the image: CT scan, MRI scan, PET/CT scan, or X-ray scan?\nChoices:\n(A) CT scan \n(B) MRI scan \n(C) PET/CT scan \n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.84375, "loss": 3.558290481567383 }, { "id": "identity_216191", "image": "PMC-VQA/images/PMC7348129_Fig4_209432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of pulmonary embolism in the image?" }, { "from": "gpt", "value": "The answer is No signs of pulmonary embolism" } ], "clip_score": 27.859375, "loss": 2.27303409576416 }, { "id": "identity_7354", "image": "Geometry3K/images/378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. Assume that segments that appear to be tangent are tangent.\nChoices:\n(A) 14\n(B) 21\n(C) 28\n(D) 35" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.09375, "loss": 2.8474464416503906 }, { "id": "identity_88961", "image": "GeoQA+/images/1506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is parallel to CD and angle 1 measures 52°, what is the measure of angle GEF?" }, { "from": "gpt", "value": "The answer is 52°" } ], "clip_score": 35.1875, "loss": 1.4404354095458984 }, { "id": "identity_297959", "image": "ScienceQA/images/11711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the given states, which state is located the farthest west? (A) Wyoming, (B) Iowa, (C) Georgia, or (D) Kansas." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.125, "loss": 3.6834514141082764 }, { "id": "identity_68099", "image": "ScienceQA/images/16136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items in the image are fruits?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.375, "loss": 2.35852313041687 }, { "id": "identity_169366", "image": "IconQA/images/98765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of celestial objects in the sequence?" }, { "from": "gpt", "value": "The answer is There are 7 celestial objects" } ], "clip_score": 20.234375, "loss": 2.260833978652954 }, { "id": "identity_59932", "image": "DVQA/images/bar_train_00040556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a 'seat' value of 10?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.171875, "loss": 2.476118326187134 }, { "id": "identity_130683", "image": "TextVQA/images/434ba5621a4b9d61.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the conductor of the ensemble for this album?" }, { "from": "gpt", "value": "The answer is Robert Hollingsworth" } ], "clip_score": 24.546875, "loss": 2.2728567123413086 }, { "id": "identity_154422", "image": "AI2D/images/4384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird beak type is best adapted for eating fish caught during flight?" }, { "from": "gpt", "value": "The answer is Aerial fishing" } ], "clip_score": 30.9375, "loss": 1.9919257164001465 }, { "id": "identity_190974", "image": "TextVQA/images/460b1dcaf38fdf9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the collectible item advertised on the billboard?" }, { "from": "gpt", "value": "The answer is $399.99" } ], "clip_score": 17.859375, "loss": 1.8450908660888672 }, { "id": "identity_271325", "image": "ScienceQA/images/16215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the following choices, which animal is adapted to blend in with dead leaves for camouflage?\n(A) Arctic wolf\n(B) plated leaf chameleon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.59375, "loss": 3.9254519939422607 }, { "id": "identity_334140", "image": "A-OKVQA/images/119761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is this?\nChoices:\n(A) soccer\n(B) baseball\n(C) equestrian\n(D) basketball" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0546875, "loss": 4.299592971801758 }, { "id": "identity_135535", "image": "MapQA/images/map_2384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the percentage range of 74.4%-76.8%?" }, { "from": "gpt", "value": "The answer is Multiple states (cannot give a single answer)" } ], "clip_score": 26.546875, "loss": 3.047548294067383 }, { "id": "identity_254073", "image": "GeoQA+/images/1717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线l1∥l2,∠MAB=125°,∠NBA=85°,若∠1的度数是∠2的2倍,则∠1的度数是多少?" }, { "from": "gpt", "value": "The answer is 20°" } ], "clip_score": 31.5, "loss": 2.5594451427459717 }, { "id": "identity_203457", "image": "PMC-VQA/images/PMC6215452_fig-14_391041.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the femur is primarily shown in the image?" }, { "from": "gpt", "value": "The answer is Proximal end" } ], "clip_score": 28.171875, "loss": 3.295806407928467 }, { "id": "identity_77913", "image": "TQA/images/abc_question_images/human_system_excretory_16117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled 'M'?" }, { "from": "gpt", "value": "The answer is To store urine" } ], "clip_score": 26.625, "loss": 3.476771593093872 }, { "id": "identity_12689", "image": "TextVQA/images/bb78de8275b56848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: whos light shines upon you?" }, { "from": "gpt", "value": "The answer is god's" } ], "clip_score": 21.140625, "loss": 3.174190044403076 }, { "id": "identity_301154", "image": "VQA-RAD/images/synpic32012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What vascular issue is depicted here?" }, { "from": "gpt", "value": "The answer is portal vein occlusion" } ], "clip_score": 24.71875, "loss": 2.5394127368927 }, { "id": "identity_338119", "image": "TextVQA/images/ac71817a98529f20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest number visible on this?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 13.7109375, "loss": 2.1628451347351074 }, { "id": "identity_253648", "image": "ScienceQA/images/11288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the top magnet is rotated 180 degrees around its horizontal axis, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is attract" } ], "clip_score": 27.921875, "loss": 3.1348648071289062 }, { "id": "identity_208042", "image": "A-OKVQA/images/264599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is There are at least 12 people visible" } ], "clip_score": 17.796875, "loss": 2.3936827182769775 }, { "id": "identity_198859", "image": "VQA2.0/images/499193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.5, "loss": 2.9916739463806152 }, { "id": "identity_283326", "image": "PMC-VQA/images/PMC4770723_F5_6788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the FISH analysis, how many images were captured?\nChoices:\n(A) Three\n(B) Two\n(C) Four\n(D) One" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.015625, "loss": 3.1537959575653076 }, { "id": "identity_323366", "image": "ScienceQA/images/14710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared characteristic of these items?\nChoices:\n(A) bouncy\n(B) opaque\n(C) translucent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 5.984375, "loss": 4.517488956451416 }, { "id": "identity_28946", "image": "GeoQA+/images/4310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C是⊙O上的点,若∠AOB=60°,则∠ACB的度数为()\nChoices:\n(A) 60°\n(B) 50°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.53125, "loss": 2.967089891433716 }, { "id": "identity_115136", "image": "A-OKVQA/images/40846.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the stage light shining on the man?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.859375, "loss": 2.95723557472229 }, { "id": "identity_337836", "image": "PMC-VQA/images/PMC5542187_F3_199626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscopy was used to obtain these images?\nChoices:\n(A) TEM \n(B) SEM \n(C) Confocal \n(D) Brightfield" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.234375, "loss": 4.097343444824219 }, { "id": "identity_234688", "image": "UniGeo/images/3070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,将△ABC绕点A逆时针旋转一定角度,得到△ADE,此时点C恰好在线段DE上,若∠B=40°,∠CAE=60°,则∠CDE的度数为多少?" }, { "from": "gpt", "value": "The answer is 140°" } ], "clip_score": 24.8125, "loss": 1.676827073097229 }, { "id": "identity_101754", "image": "IconQA/images/37925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both orange and circles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.53125, "loss": 2.3236618041992188 }, { "id": "identity_201613", "image": "PMC-VQA/images/PMC7353682_Fig1_211897.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 30.828125, "loss": 2.8599464893341064 }, { "id": "identity_229556", "image": "GeoQA+/images/1582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在菱形ABCD中,AC=8,BD=6,求菱形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 28.453125, "loss": 2.37568736076355 }, { "id": "identity_192426", "image": "PMC-VQA/images/PMC6957570_Fig8_99259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any cellular abnormalities be observed in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.734375, "loss": 3.167959451675415 }, { "id": "identity_93690", "image": "IconQA/images/33257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fish are facing left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.828125, "loss": 2.488654851913452 }, { "id": "identity_280835", "image": "ScienceQA/images/19947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive interaction with each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.671875, "loss": 3.5300850868225098 }, { "id": "identity_76886", "image": "ScienceQA/images/5061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Two magnets" } ], "clip_score": 20.921875, "loss": 3.592942953109741 }, { "id": "identity_260245", "image": "A-OKVQA/images/324082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the advantages of using walkie talkies over cellphones for communication in this scenario?" }, { "from": "gpt", "value": "The answer is Instant communication, reliability, group coordination, no network dependency" } ], "clip_score": 16.765625, "loss": 3.3744351863861084 }, { "id": "identity_220314", "image": "TextVQA/images/2187282c2dcc218d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.90625, "loss": 3.1818156242370605 }, { "id": "identity_245083", "image": "TQA/images/question_images/cycle_water_6000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify and describe the processes involved in the cycle of matter depicted in the image, and explain how each process contributes to the overall cycle" }, { "from": "gpt", "value": "The answer is The processes involved are evaporation, condensation, precipitation, surface runoff, infiltration, groundwater flow, saltwater intrusion, cloud formation, snow, lakes, and impervious layer" } ], "clip_score": 32.46875, "loss": 2.1734559535980225 }, { "id": "identity_115701", "image": "PMC-VQA/images/PMC4711110_Fig1_461730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a fracture in the pelvic bones?" }, { "from": "gpt", "value": "The answer is No evidence of fracture" } ], "clip_score": 27.796875, "loss": 3.0860273838043213 }, { "id": "identity_331548", "image": "A-OKVQA/images/537139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might cause the property values of the houses to stay low in this area?\nChoices:\n(A) wind\n(B) weather\n(C) jobs\n(D) train noise" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 12.4765625, "loss": 4.382790565490723 }, { "id": "identity_141461", "image": "MapQA/images/map_6966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of advisers in the West Coast states (California, Oregon, Washington)?" }, { "from": "gpt", "value": "The answer is A float number" } ], "clip_score": 36.3125, "loss": 4.952928066253662 }, { "id": "identity_70721", "image": "ScienceQA/images/1718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary habitat of the organism shown in the image?" }, { "from": "gpt", "value": "The answer is Wetlands or marshes" } ], "clip_score": 20.828125, "loss": 2.690016746520996 }, { "id": "identity_294928", "image": "IconQA/images/4870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangles present?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 25.34375, "loss": 1.9015239477157593 }, { "id": "identity_22126", "image": "AI2D/images/628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does stage G depict?\nChoices:\n(A) flower\n(B) seed\n(C) fruit\n(D) death" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.015625, "loss": 3.91576886177063 }, { "id": "identity_120495", "image": "A-OKVQA/images/327514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of appliance is on the countertop to the right of the stove?" }, { "from": "gpt", "value": "The answer is A slow cooker" } ], "clip_score": 19.171875, "loss": 3.9927918910980225 }, { "id": "identity_112279", "image": "Super-CLEVR/images/superCLEVR_new_001890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tiny vehicle located to the left of the yellow shiny car?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 33.9375, "loss": 3.89399790763855 }, { "id": "identity_175870", "image": "IconQA/images/44102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the line is covered by the three paper clips?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 26.65625, "loss": 1.541330099105835 }, { "id": "identity_302654", "image": "FigureQA/images/1725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Coral the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.125, "loss": 3.8583953380584717 }, { "id": "identity_313062", "image": "FigureQA/images/36621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.0, "loss": 3.92204213142395 }, { "id": "identity_114658", "image": "VQA-RAD/images/synpic57935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of hydrocephalus in this image?" }, { "from": "gpt", "value": "The answer is Yes, the enlarged ventricles suggest hydrocephalus" } ], "clip_score": 31.609375, "loss": 1.759636402130127 }, { "id": "identity_155822", "image": "ScienceQA/images/4314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the two magnets in Pair 1?" }, { "from": "gpt", "value": "The answer is 4.65 cm" } ], "clip_score": 23.25, "loss": 2.2201600074768066 }, { "id": "identity_242376", "image": "VizWiz/images/VizWiz_train_00002476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three distinct features of the logo and explain how they contribute to its overall design" }, { "from": "gpt", "value": "The answer is Symmetrical star, bright yellow, black outline" } ], "clip_score": 32.625, "loss": 3.921314239501953 }, { "id": "identity_121876", "image": "PMC-VQA/images/PMC5348553_Fig6_139017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure indicated by the arrowhead?" }, { "from": "gpt", "value": "The answer is Synaptic transmission" } ], "clip_score": 21.390625, "loss": 3.905508041381836 }, { "id": "identity_122826", "image": "PMC-VQA/images/PMC3281964_pone-0031618-g001_126322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color variations can be observed on the ant's body?" }, { "from": "gpt", "value": "The answer is The ant has a brown head, dark brown thorax, and lighter brown abdomen" } ], "clip_score": 32.65625, "loss": 2.052410840988159 }, { "id": "identity_53352", "image": "ChartQA/images/two_col_5839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the increase in the projected number of people with dementia from 2015 to 2050?" }, { "from": "gpt", "value": "The answer is 196 thousand" } ], "clip_score": 25.59375, "loss": 2.969513416290283 }, { "id": "identity_112188", "image": "VQA-RAD/images/synpic33331.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the heart appear enlarged in this image?" }, { "from": "gpt", "value": "The answer is No, the heart does not appear enlarged" } ], "clip_score": 25.109375, "loss": 1.6958454847335815 }, { "id": "identity_243613", "image": "TabMWP/images/9473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Perry has $0.05. If he buys a mechanical pencil and a package of crayons, how much more money does he need to buy a bright yellow highlighter as well?" }, { "from": "gpt", "value": "The answer is $0.08" } ], "clip_score": 34.75, "loss": 1.940763235092163 }, { "id": "identity_44795", "image": "FigureQA/images/84449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for the color Hot Pink?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 23.5, "loss": 2.228079080581665 }, { "id": "identity_89123", "image": "GeoQA+/images/2673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the cone in the image is filled with water up to half its height, what is the volume of water in the cone?" }, { "from": "gpt", "value": "The answer is 113.51 cm^3" } ], "clip_score": 34.28125, "loss": 2.2849109172821045 }, { "id": "identity_38633", "image": "IconQA/images/25821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Myra is getting dressed in the morning. The clock in her room shows the time. What time is it?\nChoices:\n(A) 6:30 A.M.\n(B) 6:30 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.875, "loss": 3.4865517616271973 }, { "id": "identity_182958", "image": "PMC-VQA/images/PMC6060136_Fig2_345899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible dental fillings or restorations in this image?" }, { "from": "gpt", "value": "The answer is No visible dental fillings or restorations" } ], "clip_score": 26.8125, "loss": 2.0619144439697266 }, { "id": "identity_181871", "image": "TabMWP/images/6487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of characters across all books?" }, { "from": "gpt", "value": "The answer is 708" } ], "clip_score": 36.375, "loss": 2.492820978164673 }, { "id": "identity_139285", "image": "FigureQA/images/13208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in length between the \"Web Maroon\" and \"Mint\" bars?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 32.0, "loss": 2.271207332611084 }, { "id": "identity_109294", "image": "Super-CLEVR/images/superCLEVR_new_001692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the purple airplane?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 28.140625, "loss": 4.257900238037109 }, { "id": "identity_6695", "image": "CLEVR-Math/images/CLEVR_train_023781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 2 large green shiny balls. How many large green shiny balls are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.46875, "loss": 2.4063313007354736 }, { "id": "identity_121737", "image": "TextVQA/images/b334f9def34c89a8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the laptop have a visible brand logo on its exterior?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.3125, "loss": 3.135061740875244 }, { "id": "identity_309399", "image": "Geometry3K/images/1188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: P is the incenter of \\(\\angle XYZ\\). Determine the length of PK.\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) 20" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.875, "loss": 2.7888004779815674 }, { "id": "identity_97961", "image": "TabMWP/images/20923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more frogs are there in 3 ponds compared to 1 pond?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.625, "loss": 2.273704767227173 }, { "id": "identity_22647", "image": "A-OKVQA/images/97667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leavening was used most here?\nChoices:\n(A) none\n(B) salt\n(C) yeast\n(D) vegemite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.6328125, "loss": 4.4695281982421875 }, { "id": "identity_60266", "image": "DocVQA/images/ysbw0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the presentation titled \"MNAR modulator of non-genomic Activity of the estrogen receptor\"?" }, { "from": "gpt", "value": "The answer is Cheskis" } ], "clip_score": 16.734375, "loss": 4.361692905426025 }, { "id": "identity_142495", "image": "ChartQA/images/16309028006002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the prevalence percentage of Anxiety disorders?" }, { "from": "gpt", "value": "The answer is 3.14%" } ], "clip_score": 23.546875, "loss": 1.996360421180725 }, { "id": "identity_49833", "image": "FigureQA/images/55279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pie chart represents a total of 400 units, how many units does the Dark Red color represent?" }, { "from": "gpt", "value": "The answer is 300" } ], "clip_score": 40.28125, "loss": 1.8088746070861816 }, { "id": "identity_269300", "image": "PMC-VQA/images/PMC3755418_fig1_227730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which quadrant of the left breast is the lesion located?\nChoices:\n(A) Upper outer quadrant\n(B) Lower middle quadrant\n(C) Upper middle quadrant\n(D) Lower outer quadrant" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.404902458190918 }, { "id": "identity_81698", "image": "Geometry3K/images/1228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle PTR?" }, { "from": "gpt", "value": "The answer is 4sqrt(3)" } ], "clip_score": 34.5625, "loss": 1.948995590209961 }, { "id": "identity_335315", "image": "ScienceQA/images/9665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the sentence.\nThe mutation in the () affected the structure and function of the ().\nChoices:\n(A) a protein . . . a gene\n(B) a gene . . . a protein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.296875, "loss": 4.028868675231934 }, { "id": "identity_116396", "image": "Super-CLEVR/images/superCLEVR_new_001855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the yellow object in the image?" }, { "from": "gpt", "value": "The answer is A plane" } ], "clip_score": 20.875, "loss": 4.4212870597839355 }, { "id": "identity_131496", "image": "PMC-VQA/images/PMC3409063_F6_147536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible signs of inflammation or infection?" }, { "from": "gpt", "value": "The answer is No signs of inflammation or infection" } ], "clip_score": 23.203125, "loss": 2.233046531677246 }, { "id": "identity_3214", "image": "PMC-VQA/images/PMC9005803_F1_252228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrow represent in the image? \nChoices:\n(A) The normal ventricle \n(B) Brain atrophy \n(C) Enlarged ventricle in acute stage \n(D) Changes after cocktail therapy. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.375, "loss": 4.1551642417907715 }, { "id": "identity_55348", "image": "FigureQA/images/4507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Medium Blue section in the pie chart?" }, { "from": "gpt", "value": "The answer is 25%" } ], "clip_score": 33.625, "loss": 1.5315991640090942 }, { "id": "identity_283985", "image": "IconQA/images/6998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of rectangles present?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.953125, "loss": 1.757700800895691 }, { "id": "identity_38034", "image": "A-OKVQA/images/31235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the weather faced by the woman?\nChoices:\n(A) foggy\n(B) sunny\n(C) cold\n(D) stormy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.890625, "loss": 3.9393951892852783 }, { "id": "identity_172378", "image": "TabMWP/images/13483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more train tickets were sold on Wednesday compared to Monday?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 38.3125, "loss": 2.163264036178589 }, { "id": "identity_15666", "image": "UniGeo/images/2488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 已知,如图,圆O的弦AB=AD,∠BOD=124°,点C在劣弧⁀{AB}上,则∠DCA的度数为()\nChoices:\n(A) 59°\n(B) 62°\n(C) 56°\n(D) 42°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.4375, "loss": 2.4541587829589844 }, { "id": "identity_227118", "image": "DocVQA/images/yxbv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date, time, and location of the Union League Club Honor Roll of Big Apple Boosters event?" }, { "from": "gpt", "value": "The answer is Tuesday, November 28, 1978, 5:30 PM, Lincoln Hall" } ], "clip_score": 37.15625, "loss": 0.7860049605369568 }, { "id": "identity_75283", "image": "TQA/images/abc_question_images/human_system_respiratory_11326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter corresponds to the organ responsible for voice production?" }, { "from": "gpt", "value": "The answer is S" } ], "clip_score": 28.546875, "loss": 2.7324271202087402 }, { "id": "identity_215196", "image": "A-OKVQA/images/457011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person in the middle holding in their hands?" }, { "from": "gpt", "value": "The answer is Ski poles" } ], "clip_score": 20.0, "loss": 2.6583900451660156 }, { "id": "identity_18402", "image": "GEOS/images/044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, AD = 4, AB = 3 and CD = 9. What is the area of triangle AEC?\nChoices:\n(A) 18\n(B) 13.5\n(C) 9\n(D) 4.5\n(E) 3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.796875, "loss": 2.6423068046569824 }, { "id": "identity_96135", "image": "IconQA/images/62154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the nail is driven into a piece of wood so that only the head is visible, how much of the nail's length is inside the wood?" }, { "from": "gpt", "value": "The answer is 2.25 inches" } ], "clip_score": 34.3125, "loss": 1.6145837306976318 }, { "id": "identity_166316", "image": "IconQA/images/88724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares wide is the letter J?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.203125, "loss": 2.1859054565429688 }, { "id": "identity_253384", "image": "DocVQA/images/jtgj0223_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between Total assets and Total liabilities, and how does this compare to the Total income minus Total expenses?" }, { "from": "gpt", "value": "The answer is 0 and 52.2" } ], "clip_score": 19.984375, "loss": 3.5104587078094482 }, { "id": "identity_240799", "image": "PMC-VQA/images/PMC5135722_Fig1_101981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate volume of the intracerebral hematoma in the right hemisphere if the hematoma has an ellipsoid shape with the following dimensions: 4 cm in length, 3 cm in width, and 2 cm in height?" }, { "from": "gpt", "value": "The answer is approximately 12.56 cm³" } ], "clip_score": 28.390625, "loss": 2.090390682220459 }, { "id": "identity_77977", "image": "AI2D/images/3452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size range of an adult bed bug as indicated in the image?" }, { "from": "gpt", "value": "The answer is 3/16 - 1/4\" long, half as wide" } ], "clip_score": 33.5625, "loss": 1.58882737159729 }, { "id": "identity_47109", "image": "DVQA/images/bar_train_00092685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"match\"?" }, { "from": "gpt", "value": "The answer is spread" } ], "clip_score": 21.65625, "loss": 4.9034342765808105 }, { "id": "identity_90032", "image": "Geometry3K/images/1673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of x in the given diagram" }, { "from": "gpt", "value": "The answer is 1.5" } ], "clip_score": 31.59375, "loss": 1.7653297185897827 }, { "id": "identity_215568", "image": "TextVQA/images/fbcc3a12ba8e8505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is being advertised on the banners?" }, { "from": "gpt", "value": "The answer is Vancouver 2010 Olympic and Paralympic Winter Games" } ], "clip_score": 31.546875, "loss": 1.0775161981582642 }, { "id": "identity_122125", "image": "A-OKVQA/images/575465.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team logo on the jersey of the player at first base?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 24.5, "loss": 3.705507278442383 }, { "id": "identity_278169", "image": "VQA2.0/images/153031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are the tall ones in the background?" }, { "from": "gpt", "value": "The answer is palms" } ], "clip_score": 22.265625, "loss": 4.350247859954834 }, { "id": "identity_312142", "image": "A-OKVQA/images/484386.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are there tires on the side of the boat?\nChoices:\n(A) helping float\n(B) docking against\n(C) flat repair\n(D) good luck" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.90625, "loss": 4.24207878112793 }, { "id": "identity_58852", "image": "FigureQA/images/24092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 34.53125, "loss": 3.6042275428771973 }, { "id": "identity_311837", "image": "FigureQA/images/2004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the largest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 4.830996036529541 }, { "id": "identity_58217", "image": "PlotQA/images/86097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the percentage of HIV infected female population in Zambia show an increasing or decreasing trend?" }, { "from": "gpt", "value": "The answer is Decreasing trend" } ], "clip_score": 26.6875, "loss": 2.27923321723938 }, { "id": "identity_33365", "image": "TabMWP/images/36351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Alexa has $0.12. Does she have enough to buy a yellow tiger's eye and a piece of red sandstone?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.0, "loss": 3.778733730316162 }, { "id": "identity_315376", "image": "CLEVR-Math/images/CLEVR_train_053266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all large rubber items. Remove all metal things. How many items remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.265625, "loss": 2.4642398357391357 }, { "id": "identity_248684", "image": "ScienceQA/images/2161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary composition of an antler, and how does it differ from a mineral in terms of its biological origin and material properties?" }, { "from": "gpt", "value": "The answer is Antlers are organic and composed of bone, unlike minerals" } ], "clip_score": 33.28125, "loss": 3.070955276489258 }, { "id": "identity_132414", "image": "A-OKVQA/images/28115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any cars parked on the side of the street?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.96875, "loss": 3.1243271827697754 }, { "id": "identity_297921", "image": "VQA2.0/images/351262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the police officer on the motorcycle waving at someone?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 29.859375, "loss": 3.267702102661133 }, { "id": "identity_218060", "image": "VQA2.0/images/429227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 10.609375, "loss": 2.436802864074707 }, { "id": "identity_227468", "image": "Geometry3K/images/121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the perimeter of triangle \\( WXY \\)" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 34.625, "loss": 3.3959546089172363 }, { "id": "identity_197641", "image": "TextVQA/images/605191b3e49690ec.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing yellow uniforms in the image?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 20.078125, "loss": 1.919097661972046 }, { "id": "identity_109008", "image": "TextVQA/images/c871f08c3181d558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed on the counter in the image?" }, { "from": "gpt", "value": "The answer is 0:38" } ], "clip_score": 16.96875, "loss": 2.0499372482299805 }, { "id": "identity_237626", "image": "GeoQA+/images/5485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BC是⊙O的直径,点D在⊙O上,AB是⊙O的切线,B为切点,连接CD并延长交AB于点A,若∠BOD=100°,则∠CAD的度数是多少?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 25.0, "loss": 1.8756183385849 }, { "id": "identity_336683", "image": "PlotQA/images/30133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many labels are there in the legend?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.609375, "loss": 2.7146995067596436 }, { "id": "identity_86437", "image": "Geometry3K/images/885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the side opposite to the 60° angle to the side adjacent to the 60° angle?" }, { "from": "gpt", "value": "The answer is √3/2 or 0.866 (rounded to three decimal places)" } ], "clip_score": 31.296875, "loss": 1.4707880020141602 }, { "id": "identity_73260", "image": "ScienceQA/images/16308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item on Judith's list is a type of nut?" }, { "from": "gpt", "value": "The answer is almonds" } ], "clip_score": 27.140625, "loss": 2.07576322555542 }, { "id": "identity_38561", "image": "A-OKVQA/images/557174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which kind of animal lives in this house?\nChoices:\n(A) reptile\n(B) fish\n(C) cat\n(D) dog" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 8.28125, "loss": 3.5381157398223877 }, { "id": "identity_112097", "image": "Super-CLEVR/images/superCLEVR_new_001946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the truck in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.703125, "loss": 2.676164388656616 }, { "id": "identity_302420", "image": "ChartQA/images/7598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest value in the chart?" }, { "from": "gpt", "value": "The answer is 28" } ], "clip_score": 11.5703125, "loss": 2.742682933807373 }, { "id": "identity_65529", "image": "AI2D/images/247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the snake play in this food web?" }, { "from": "gpt", "value": "The answer is Secondary and Tertiary Consumer" } ], "clip_score": 37.375, "loss": 2.235151529312134 }, { "id": "identity_175268", "image": "TabMWP/images/35573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected injured skater was age 8?" }, { "from": "gpt", "value": "The answer is 2/5 or 0.4" } ], "clip_score": 26.4375, "loss": 1.2973016500473022 }, { "id": "identity_154142", "image": "AI2D/images/2141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the shark's life cycle according to the diagram?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.28125, "loss": 2.3054940700531006 }, { "id": "identity_118589", "image": "A-OKVQA/images/44504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the chair visible in the image?" }, { "from": "gpt", "value": "The answer is White with green stripes" } ], "clip_score": 23.96875, "loss": 3.2325823307037354 }, { "id": "identity_160296", "image": "GeoQA+/images/5621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle C is equal to angle E, what is the measure of angle COE?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 33.21875, "loss": 1.7599855661392212 }, { "id": "identity_166674", "image": "TabMWP/images/14464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total number of employees with health care benefits from 2006 to 2010?" }, { "from": "gpt", "value": "The answer is 157" } ], "clip_score": 37.40625, "loss": 2.3614583015441895 }, { "id": "identity_67440", "image": "ScienceQA/images/13519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of environment is the scientist working in, as suggested by the image?" }, { "from": "gpt", "value": "The answer is Controlled, sterile laboratory environment" } ], "clip_score": 28.46875, "loss": 2.620305299758911 }, { "id": "identity_307513", "image": "PlotQA/images/125015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there on the first tick from the left?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.828125, "loss": 2.510411500930786 }, { "id": "identity_118361", "image": "TextVQA/images/3f4a8ac091ec3868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the IBM/Tandy compatible version of the game?" }, { "from": "gpt", "value": "The answer is $49.95" } ], "clip_score": 19.09375, "loss": 1.404219388961792 }, { "id": "identity_127591", "image": "VQA-AS/images/15762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person wearing?" }, { "from": "gpt", "value": "The answer is Boots" } ], "clip_score": 23.796875, "loss": 3.4431545734405518 }, { "id": "identity_256341", "image": "A-OKVQA/images/12020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three essential pieces of protective gear he should wear while skateboarding to ensure maximum safety?" }, { "from": "gpt", "value": "The answer is Helmet, wrist guards, knee pads" } ], "clip_score": 18.578125, "loss": 1.4832756519317627 }, { "id": "identity_305245", "image": "Super-CLEVR/images/superCLEVR_new_001227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the matte object that shares its color with the small shiny tandem bike?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 28.90625, "loss": 4.051851272583008 }, { "id": "identity_307491", "image": "GeoQA+/images/3973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,要修建一条公路,从某村沿北偏东75°方向到另一个村,从这个村沿北偏西25°方向到第三个村.若要保持公路CE与从第一个村到第二个村的方向一致,则应顺时针转动的度数为()\n\n选择:\n(A) 50°\n(B) 75°\n(C) 100°\n(D) 105°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.125, "loss": 2.6905155181884766 }, { "id": "identity_115678", "image": "A-OKVQA/images/92894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equipment is the catcher wearing?" }, { "from": "gpt", "value": "The answer is Catcher's gear" } ], "clip_score": 25.109375, "loss": 1.8268382549285889 }, { "id": "identity_1934", "image": "VQA2.0/images/77100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 11.4375, "loss": 2.339465856552124 }, { "id": "identity_115290", "image": "VizWiz/images/VizWiz_train_00004672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the case have any visible branding or logos?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.6796875, "loss": 2.8222317695617676 }, { "id": "identity_4547", "image": "DocVQA/images/xkmd0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of honorarium was offered?" }, { "from": "gpt", "value": "The answer is Meals" } ], "clip_score": 21.703125, "loss": 3.8335368633270264 }, { "id": "identity_313609", "image": "DVQA/images/bar_train_00088883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest total units sold across all locations?" }, { "from": "gpt", "value": "The answer is career" } ], "clip_score": 29.46875, "loss": 4.322079658508301 }, { "id": "identity_121472", "image": "VQA-RAD/images/synpic24424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see the kidneys in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.875, "loss": 2.951066493988037 }, { "id": "identity_126584", "image": "PMC-VQA/images/PMC7366709_Fig3_215539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the optic disc be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes, it is the bright circular area where vessels converge" } ], "clip_score": 29.25, "loss": 3.311108112335205 }, { "id": "identity_154187", "image": "TQA/images/abc_question_images/cycle_water_14915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the process of precipitation?" }, { "from": "gpt", "value": "The answer is Label V" } ], "clip_score": 31.3125, "loss": 3.9656331539154053 }, { "id": "identity_326111", "image": "PMC-VQA/images/PMC6198224_F0004_384919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red structure seen here represent? \nChoices:\n(A) Lateral semicircular canal \n(B) Cochlear nerve \n(C) Tractus spiralis arteriosus \n(D) Facial nerve" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 3.9100425243377686 }, { "id": "identity_156393", "image": "ScienceQA/images/607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pink particles are removed from both solutions, what would be the remaining volume of the solvent in each?" }, { "from": "gpt", "value": "The answer is 33 mL for Solution A and 35 mL for Solution B" } ], "clip_score": 40.75, "loss": 1.5706065893173218 }, { "id": "identity_228069", "image": "IconQA/images/53548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you place the nail at the 1-inch mark on the ruler, what will be the position of the tip of the nail on the ruler?" }, { "from": "gpt", "value": "The answer is 4 inches" } ], "clip_score": 36.1875, "loss": 2.8913955688476562 }, { "id": "identity_273646", "image": "DVQA/images/bar_train_00024267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the values in the chart follow a percentage scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.25, "loss": 3.4944190979003906 }, { "id": "identity_276576", "image": "Super-CLEVR/images/superCLEVR_new_000337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more big yellow shiny airliners or small green objects?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.3125, "loss": 2.929266929626465 }, { "id": "identity_335383", "image": "ScienceQA/images/11082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's mouth is also adapted to consume plant matter?\nChoices:\n(A) goat\n(B) Nile crocodile" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.796875, "loss": 3.901404619216919 }, { "id": "identity_241897", "image": "VQA2.0/images/204420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the background, and what other objects can be identified in the foreground?" }, { "from": "gpt", "value": "The answer is Two bicycles, an umbrella, and a rope" } ], "clip_score": 24.328125, "loss": 2.23858642578125 }, { "id": "identity_276105", "image": "UniGeo/images/3488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is the diameter of circle O, point C is a point on the extension of AB, and OB equals BC. CP, CQ, and DE are tangents to circle O, with P and Q as the points of tangency. If the radius of circle O is 2, what is the perimeter of triangle CDE?\nChoices:\n(A) 4\n(B) 6\n(C) 4√3\n(D) 6√3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.09375, "loss": 2.3150033950805664 }, { "id": "identity_180127", "image": "CLEVR-Math/images/CLEVR_train_054264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither a cylinder nor a cube?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.46875, "loss": 2.4791011810302734 }, { "id": "identity_99544", "image": "IconQA/images/30165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past the hour is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 30.3125, "loss": 2.3861136436462402 }, { "id": "identity_44483", "image": "FigureQA/images/17603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bar with the lowest value?" }, { "from": "gpt", "value": "The answer is ForestGreen" } ], "clip_score": 25.3125, "loss": 5.184683799743652 }, { "id": "identity_125974", "image": "A-OKVQA/images/397325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a dog present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.515625, "loss": 3.222705125808716 }, { "id": "identity_70911", "image": "ScienceQA/images/3166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of ecosystem disturbance is depicted in the image?" }, { "from": "gpt", "value": "The answer is Fire disturbance" } ], "clip_score": 28.015625, "loss": 3.34806489944458 }, { "id": "identity_152647", "image": "ScienceQA/images/3086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the poles in Pair 2?" }, { "from": "gpt", "value": "The answer is South pole facing North pole" } ], "clip_score": 26.40625, "loss": 3.591066598892212 }, { "id": "identity_79134", "image": "Geometry3K/images/2019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In circle R, if SU = 20, YW = 20, and m∠YXZ = 45°, what is the length of arc ST?" }, { "from": "gpt", "value": "The answer is 10π" } ], "clip_score": 31.28125, "loss": 2.2132315635681152 }, { "id": "identity_292333", "image": "IconQA/images/28785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If there are 2 balls in the top row, how many balls are in the bottom row?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.53125, "loss": 2.3273401260375977 }, { "id": "identity_220821", "image": "VQA2.0/images/464736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the surfboards' design?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.1875, "loss": 3.831408739089966 }, { "id": "identity_4478", "image": "Geometry3K/images/1660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Suppose B D = 12 millimeters. Find the radius of the circle.\nChoices:\n(A) 6\n(B) 12\n(C) 18\n(D) 36" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.4375, "loss": 2.611588954925537 }, { "id": "identity_286118", "image": "PMC-VQA/images/PMC4592368_Fig1_429405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique offers a superior visualization of the WMH lesions among the following options? \n(A) T1-weighted MPRAGE images reformatted from sagittal to axial plane \n(B) Axial T2-weighted FLAIR images \n(C) T1-weighted MPRAGE images \n(D) Sagittal T2-weighted FLAIR images." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.96875, "loss": 3.3484129905700684 }, { "id": "identity_179302", "image": "CLEVR-Math/images/CLEVR_train_059351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.046875, "loss": 2.5346450805664062 }, { "id": "identity_139445", "image": "DVQA/images/bar_train_00159578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of people that prefer an object?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.9375, "loss": 2.1512291431427 }, { "id": "identity_147218", "image": "DVQA/images/bar_train_00126110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the second highest value in the 'hen' bar?" }, { "from": "gpt", "value": "The answer is size (green)" } ], "clip_score": 20.3125, "loss": 3.328261137008667 }, { "id": "identity_101658", "image": "IconQA/images/832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are neither blue nor circles?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 29.28125, "loss": 2.8405609130859375 }, { "id": "identity_122441", "image": "TextVQA/images/23733e0799cc2200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written on the Android mascot's body?" }, { "from": "gpt", "value": "The answer is Android" } ], "clip_score": 33.53125, "loss": 3.547964572906494 }, { "id": "identity_272189", "image": "PlotQA/images/22074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After how many years does the value added by the agriculture sector exceed the average value added by the agriculture sector calculated over all years?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.5625, "loss": 2.667175531387329 }, { "id": "identity_199978", "image": "PMC-VQA/images/PMC7270124_Fig3_183801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of hemorrhage in the patient's brain in image A?" }, { "from": "gpt", "value": "The answer is No signs of hemorrhage" } ], "clip_score": 31.34375, "loss": 2.623699903488159 }, { "id": "identity_49701", "image": "FigureQA/images/98525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Peru at x-axis value 30?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 28.46875, "loss": 2.556205987930298 }, { "id": "identity_264644", "image": "ScienceQA/images/5374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: Saint Vincent and the Grenadines, Trinidad and Tobago, Dominica, or Grenada?\nChoices:\n(A) Saint Vincent and the Grenadines\n(B) Trinidad and Tobago\n(C) Dominica\n(D) Grenada" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.171875, "loss": 4.313817024230957 }, { "id": "identity_22748", "image": "Super-CLEVR/images/superCLEVR_new_001177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The metallic bus has what color?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 24.34375, "loss": 4.3601837158203125 }, { "id": "identity_48166", "image": "ChartQA/images/multi_col_372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest price for a soft drink and in which year?" }, { "from": "gpt", "value": "The answer is 4.32 US dollars in 2014" } ], "clip_score": 25.8125, "loss": 1.8909436464309692 }, { "id": "identity_134615", "image": "PlotQA/images/34581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage of self-employed workers between Latvia and Colombia in 2007?" }, { "from": "gpt", "value": "The answer is Approximately 35%" } ], "clip_score": 34.0, "loss": 2.410348653793335 }, { "id": "identity_62600", "image": "DocVQA/images/kqph0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For which date are the reservations requested?" }, { "from": "gpt", "value": "The answer is Sunday evening, September 15" } ], "clip_score": 19.953125, "loss": 3.2232069969177246 }, { "id": "identity_279962", "image": "GeoQA+/images/2554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, BD is the diameter of circle O, point A lies on the extension of BD, AC is a tangent to circle O at point C, and ∠A = 30°. What is the measure of ∠B?\nChoices:\n(A) 60°\n(B) 30°\n(C) 15°\n(D) 45°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.6875, "loss": 2.691072702407837 }, { "id": "identity_248529", "image": "VQA2.0/images/40138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of vehicles are visible in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Two types: boat and train" } ], "clip_score": 26.390625, "loss": 3.5298514366149902 }, { "id": "identity_170880", "image": "TabMWP/images/12469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the aquarium sell the least number of tickets?" }, { "from": "gpt", "value": "The answer is Saturday" } ], "clip_score": 29.921875, "loss": 3.2651684284210205 }, { "id": "identity_64647", "image": "ScienceQA/images/907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique items does Wendy want that Reggie does not?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.078125, "loss": 2.424138069152832 }, { "id": "identity_180839", "image": "IconQA/images/104360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more blue marbles are added to the group, how many marbles will there be in total?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.046875, "loss": 2.0291519165039062 }, { "id": "identity_287164", "image": "DocVQA/images/qxmp0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organization is Luis Mejia a member of?" }, { "from": "gpt", "value": "The answer is incap" } ], "clip_score": 16.6875, "loss": 5.339815139770508 }, { "id": "identity_330390", "image": "IconQA/images/677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show three activities Gabby did Monday morning. Which activity did Gabby do last?\nChoices:\n(A) made the bed\n(B) walked the dog\n(C) packed a lunch" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.625, "loss": 3.588381290435791 }, { "id": "identity_242511", "image": "VQA2.0/images/578705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image, and how many of them are interacting with the motorcycle?" }, { "from": "gpt", "value": "The answer is 4 people visible, 2 interacting with the motorcycle" } ], "clip_score": 26.875, "loss": 1.9588398933410645 }, { "id": "identity_194251", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a window treatment present in the kitchen?" }, { "from": "gpt", "value": "The answer is Yes, there are blinds" } ], "clip_score": 24.375, "loss": 3.0207924842834473 }, { "id": "identity_93012", "image": "IconQA/images/32647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were to be extended by 2 centimeters, what would be its new length in centimeters?" }, { "from": "gpt", "value": "The answer is 8 centimeters" } ], "clip_score": 29.828125, "loss": 2.0257177352905273 }, { "id": "identity_267900", "image": "TabMWP/images/28505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Laura have sufficient funds to purchase both a bag of potato chips and a muffin with her $2.00?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.71875, "loss": 3.947791337966919 }, { "id": "identity_316772", "image": "VQA2.0/images/518649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What place is this person in?" }, { "from": "gpt", "value": "The answer is grosvenor st" } ], "clip_score": 15.9375, "loss": 5.11227560043335 }, { "id": "identity_5644", "image": "ScienceQA/images/19855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each refrigerator\n(B) each refrigerator . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.796875, "loss": 4.046876430511475 }, { "id": "identity_336744", "image": "A-OKVQA/images/402967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of building is shown here?\nChoices:\n(A) cafe\n(B) pagoda\n(C) pavilion\n(D) barn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.15625, "loss": 4.341344356536865 }, { "id": "identity_131488", "image": "VQA2.0/images/330292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity taking place in the image?" }, { "from": "gpt", "value": "The answer is Kite flying" } ], "clip_score": 23.15625, "loss": 3.0594563484191895 }, { "id": "identity_298620", "image": "TextVQA/images/d6959ca3b63567f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of shoes is the sponsor of this event?" }, { "from": "gpt", "value": "The answer is adidas" } ], "clip_score": 20.375, "loss": 2.634780168533325 }, { "id": "identity_184626", "image": "PMC-VQA/images/PMC6211411_Fig1_389260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is predominantly featured in this image?" }, { "from": "gpt", "value": "The answer is The aorta and its major branches" } ], "clip_score": 21.375, "loss": 2.8568363189697266 }, { "id": "identity_164059", "image": "GeoQA+/images/3866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AC = 6 and AD = 2, what is the ratio of the area of triangle ADM to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/9" } ], "clip_score": 31.3125, "loss": 1.767075777053833 }, { "id": "identity_1994", "image": "FigureQA/images/42529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Blue the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.7109375, "loss": 3.80000376701355 }, { "id": "identity_274821", "image": "FigureQA/images/80840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tan the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.859375, "loss": 4.450069904327393 }, { "id": "identity_187125", "image": "PMC-VQA/images/PMC5076298_f3_87031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any abnormalities visible in this image?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 28.703125, "loss": 3.233304738998413 }, { "id": "identity_263023", "image": "UniGeo/images/2629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in quadrilateral ABCD, if angle B is 40° larger than angle A, what is the measure of angle D?\nChoices:\n(A) 60°\n(B) 70°\n(C) 100°\n(D) 110°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.46875, "loss": 2.4019651412963867 }, { "id": "identity_219566", "image": "VQA2.0/images/56896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the curtains?" }, { "from": "gpt", "value": "The answer is Cartoon character faces" } ], "clip_score": 26.34375, "loss": 3.903334140777588 }, { "id": "identity_3022", "image": "TextVQA/images/b861e0309857bf11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what company are those storage units from?" }, { "from": "gpt", "value": "The answer is mol" } ], "clip_score": 12.84375, "loss": 4.476870059967041 }, { "id": "identity_194471", "image": "A-OKVQA/images/79869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toy figures can be seen on the desk?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.40625, "loss": 2.3922688961029053 }, { "id": "identity_90725", "image": "GeoQA+/images/2041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if the radius of the circle is 5 units, what is the length of AB?" }, { "from": "gpt", "value": "The answer is √125 or approximately 11.18 units" } ], "clip_score": 31.6875, "loss": 1.5587831735610962 }, { "id": "identity_179005", "image": "TabMWP/images/28008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many members planted an even number of seeds?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.3125, "loss": 2.4419519901275635 }, { "id": "identity_189998", "image": "TextVQA/images/d3bdf740a9533be9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tent in the middle of the image?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 24.15625, "loss": 3.822481393814087 }, { "id": "identity_41004", "image": "FigureQA/images/73173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Dim Gray have a consistent value across the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.671875, "loss": 3.0028908252716064 }, { "id": "identity_292682", "image": "TabMWP/images/27007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the rate of change between 2019 and 2020 in the time taken by campers at Luke's summer camp to finish the obstacle course, as indicated in the table?" }, { "from": "gpt", "value": "The answer is -2" } ], "clip_score": 30.8125, "loss": 3.839977979660034 }, { "id": "identity_211021", "image": "A-OKVQA/images/170700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the umbrellas on the patio?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.125, "loss": 3.405500888824463 }, { "id": "identity_222395", "image": "Super-CLEVR/images/superCLEVR_new_001938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more buses than cars in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.0625, "loss": 3.0996651649475098 }, { "id": "identity_322492", "image": "VQA2.0/images/217341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this the process of doing something with a horse?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.34375, "loss": 3.325559139251709 }, { "id": "identity_110017", "image": "A-OKVQA/images/448547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the horses in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.46875, "loss": 3.4204628467559814 }, { "id": "identity_40273", "image": "FigureQA/images/80821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Maroon line intersect the Steel Blue line?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.09375, "loss": 2.2612483501434326 }, { "id": "identity_310827", "image": "DocVQA/images/sngf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street address of this organization?" }, { "from": "gpt", "value": "The answer is 1790 Broadway" } ], "clip_score": 19.96875, "loss": 2.4187614917755127 }, { "id": "identity_201405", "image": "PMC-VQA/images/PMC7764378_brainsci-10-00957-f003_328653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of hydrocephalus in this MRI image?" }, { "from": "gpt", "value": "The answer is No signs of hydrocephalus" } ], "clip_score": 36.5, "loss": 2.5250325202941895 }, { "id": "identity_89510", "image": "UniGeo/images/3388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABD in the given diagram?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 30.796875, "loss": 1.6782622337341309 }, { "id": "identity_9048", "image": "ScienceQA/images/8735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of West Virginia?\nChoices:\n(A) Birmingham\n(B) Baton Rouge\n(C) Huntington\n(D) Charleston" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.28125, "loss": 3.7248008251190186 }, { "id": "identity_97916", "image": "CLEVR-Math/images/CLEVR_train_061841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the golden sphere represents the sun, which object could represent the Earth in terms of relative size?" }, { "from": "gpt", "value": "The answer is The red cube" } ], "clip_score": 24.390625, "loss": 4.097790718078613 }, { "id": "identity_150028", "image": "DocVQA/images/jggn0226_45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of review written at the top right corner of the page?" }, { "from": "gpt", "value": "The answer is 10-29-01" } ], "clip_score": 21.875, "loss": 2.0357930660247803 }, { "id": "identity_336009", "image": "PMC-VQA/images/PMC5763060_fig3_262113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part is impacted by posterior SCJ dislocation?\nChoices:\n(A) Ankle \n(B) Elbow \n(C) Spine \n(D) Shoulder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.734375, "loss": 3.6460413932800293 }, { "id": "identity_90731", "image": "GeoQA+/images/327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BOD?" }, { "from": "gpt", "value": "The answer is 15 degrees" } ], "clip_score": 32.15625, "loss": 2.135577440261841 }, { "id": "identity_141783", "image": "FigureQA/images/59943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the Chocolate bar?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 21.609375, "loss": 2.5099780559539795 }, { "id": "identity_258477", "image": "TextVQA/images/a5799979b3158c6b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of all the brands or entities mentioned on the rail, and which one is the manufacturer?" }, { "from": "gpt", "value": "The answer is MINI, LINZAG, SHAPE; manufacturer is LINZAG" } ], "clip_score": 19.109375, "loss": 2.3978867530822754 }, { "id": "identity_288257", "image": "TabMWP/images/830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of baseballs hit by the player in practice each day?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.71875, "loss": 2.700113534927368 }, { "id": "identity_285718", "image": "PMC-VQA/images/PMC4503870_Fig5_406410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most suitable imaging view for determining the optimal distance for the MitraClip procedure?\nChoices:\n(A) Short axis view\n(B) Long axis view\n(C) Four-chamber view\n(D) Two-chamber view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.527661323547363 }, { "id": "identity_17821", "image": "IconQA/images/27218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.4375, "loss": 4.612637519836426 }, { "id": "identity_151986", "image": "ScienceQA/images/8875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can you tell the log was once part of a living tree?" }, { "from": "gpt", "value": "The answer is Presence of growth rings and sprouting leaf" } ], "clip_score": 35.46875, "loss": 3.0606491565704346 }, { "id": "identity_117879", "image": "VQA2.0/images/456755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of toy construction is located to the immediate right of the elephant?" }, { "from": "gpt", "value": "The answer is A toy train set" } ], "clip_score": 32.0, "loss": 3.0226352214813232 }, { "id": "identity_200853", "image": "A-OKVQA/images/147577.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is predominantly featured in the mural?" }, { "from": "gpt", "value": "The answer is Train" } ], "clip_score": 31.203125, "loss": 4.1727800369262695 }, { "id": "identity_238761", "image": "TabMWP/images/18672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pages in all magazines that have at least 34 pages?" }, { "from": "gpt", "value": "The answer is 1526" } ], "clip_score": 23.421875, "loss": 2.278282880783081 }, { "id": "identity_10117", "image": "GeoQA+/images/2170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C,D是⊙O上的点,∠CDB=30°,过点C作⊙O的切线交AB的延长线于E,则sinE的值为()\nChoices:\n(A) \\frac{1}{2}\n(B) \\frac{√{2}}{2}\n(C) \\frac{√{3}}{2}\n(D) \\frac{√{3}}{3}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.359375, "loss": 2.5970640182495117 }, { "id": "identity_150840", "image": "TQA/images/question_images/fossils_6895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the three types of fossil fuels depicted in the image?" }, { "from": "gpt", "value": "The answer is Coal, Oil, Natural Gas" } ], "clip_score": 27.65625, "loss": 1.7406635284423828 }, { "id": "identity_27156", "image": "PMC-VQA/images/PMC4195844_Fig10_327545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of scoliosis is presented in the images? \nChoices:\n(A) Lumbar scoliosis \n(B) Idiopathic scoliosis \n(C) Congenital scoliosis \n(D) Kyphoscoliosis " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.46875, "loss": 3.9582138061523438 }, { "id": "identity_186866", "image": "A-OKVQA/images/61452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the horse pulling?" }, { "from": "gpt", "value": "The answer is Wagon" } ], "clip_score": 28.28125, "loss": 3.239248514175415 }, { "id": "identity_57849", "image": "DVQA/images/bar_train_00082242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the 'tone' color for 'truck'?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 25.46875, "loss": 2.5320119857788086 }, { "id": "identity_309009", "image": "ScienceQA/images/535.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is in column 1?\nChoices:\n(A) the fast-food restaurant\n(B) the police department\n(C) the grocery store\n(D) the fire department" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.015625, "loss": 3.6832473278045654 }, { "id": "identity_129225", "image": "VizWiz/images/VizWiz_train_00015309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the net weight of the sauce in the bottle?" }, { "from": "gpt", "value": "The answer is 350 grams or 12.34 ounces" } ], "clip_score": 26.515625, "loss": 1.3791487216949463 }, { "id": "identity_292568", "image": "TextVQA/images/ef5b03d3ec810a0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of vodka is available on the shelf?" }, { "from": "gpt", "value": "The answer is grey goose,ketel one" } ], "clip_score": 28.984375, "loss": 3.916250705718994 }, { "id": "identity_257806", "image": "ChartQA/images/10799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the maximum value of conservative and the minimum value of liberal across all groups?" }, { "from": "gpt", "value": "The answer is 64" } ], "clip_score": 16.09375, "loss": 2.4306089878082275 }, { "id": "identity_21059", "image": "PMC-VQA/images/PMC5728059_Fig4_252343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the infarct located? \nChoices:\n(A) Left Pons \n(B) Right Pons \n(C) Cerebellum \n(D) Occipital Lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.703125, "loss": 4.342491149902344 }, { "id": "identity_180868", "image": "TabMWP/images/8457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more baskets did Harper make on her best day compared to her worst day?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.59375, "loss": 2.865971803665161 }, { "id": "identity_30272", "image": "VQA2.0/images/316194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the larger bus?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 19.90625, "loss": 3.3281357288360596 }, { "id": "identity_188215", "image": "PMC-VQA/images/PMC9023168_fig5_257746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the image on the far right?" }, { "from": "gpt", "value": "The answer is CXCL14" } ], "clip_score": 23.265625, "loss": 1.916645884513855 }, { "id": "identity_215022", "image": "TextVQA/images/114ffcdef333cf08.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is prominently featured in the image with an advertisement?" }, { "from": "gpt", "value": "The answer is A double-decker bus" } ], "clip_score": 31.09375, "loss": 2.3780829906463623 }, { "id": "identity_137336", "image": "DocVQA/images/rlvm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the project?" }, { "from": "gpt", "value": "The answer is Focus on land use form and environmental quality relationship" } ], "clip_score": 24.921875, "loss": 3.3411953449249268 }, { "id": "identity_136635", "image": "DVQA/images/bar_train_00091882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between 'fool' and 'guard' in the 'fate' category?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.4375, "loss": 2.578906297683716 }, { "id": "identity_151864", "image": "ScienceQA/images/17320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 27.734375, "loss": 2.7735233306884766 }, { "id": "identity_306117", "image": "VizWiz/images/VizWiz_train_00015009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this image, please?" }, { "from": "gpt", "value": "The answer is hot chocolate marshmallows" } ], "clip_score": 27.9375, "loss": 2.538201332092285 }, { "id": "identity_11207", "image": "TextVQA/images/49ff319bac6d6611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of computers are in the white boxes?" }, { "from": "gpt", "value": "The answer is combo" } ], "clip_score": 20.828125, "loss": 4.461306571960449 }, { "id": "identity_305392", "image": "PMC-VQA/images/PMC4897665_fig1_39000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging view used to capture this? \nChoices:\n(A) Apical four-chamber view \n(B) Subcostal view \n(C) Parasternal long-axis view \n(D) Suprasternal view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.796875, "loss": 3.9861156940460205 }, { "id": "identity_301379", "image": "FigureQA/images/16473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.6875, "loss": 4.418432712554932 }, { "id": "identity_28651", "image": "TextVQA/images/142076607dbdfa59.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is that plane part of the star alliance?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 32.0625, "loss": 3.4545493125915527 }, { "id": "identity_171504", "image": "TabMWP/images/26568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $675, is there a shortage or a surplus?" }, { "from": "gpt", "value": "The answer is Shortage" } ], "clip_score": 25.359375, "loss": 2.5584309101104736 }, { "id": "identity_174628", "image": "IconQA/images/68514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you color one side of each shape, how many sides will remain uncolored?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 31.53125, "loss": 2.37300443649292 }, { "id": "identity_111262", "image": "TextVQA/images/07e17d71837d0f5b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap on the Fanta bottles?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.84375, "loss": 3.3888492584228516 }, { "id": "identity_312381", "image": "PMC-VQA/images/PMC2768639_F0002_49139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pattern observed in this scan?\nChoices:\n(A) Peripheral cold areas and central FDG concentration \n(B) Peripheral FDG concentration and central cold areas \n(C) Peripheral hot areas and central cold areas \n(D) Peripheral cold areas and central hot areas" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.84375, "loss": 4.115065097808838 }, { "id": "identity_206675", "image": "PMC-VQA/images/PMC7906016_F3_374092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the brain is the red arrow pointing to?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 26.953125, "loss": 3.5741770267486572 }, { "id": "identity_214604", "image": "TextVQA/images/04df3cbd8345f25f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the gauge in the image?" }, { "from": "gpt", "value": "The answer is It indicates speed or level in machinery" } ], "clip_score": 26.40625, "loss": 3.695972204208374 }, { "id": "identity_178235", "image": "TabMWP/images/2479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many parks have more than 4 basketball hoops?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 37.15625, "loss": 2.3812673091888428 }, { "id": "identity_202251", "image": "A-OKVQA/images/422877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the closest car to the camera?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.8125, "loss": 4.411124229431152 }, { "id": "identity_69438", "image": "AI2D/images/1169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Centrioles\"?" }, { "from": "gpt", "value": "The answer is Involved in cell division" } ], "clip_score": 25.234375, "loss": 2.222825765609741 }, { "id": "identity_81004", "image": "UniGeo/images/1519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AD is parallel to BE and CF, and the lines l1 and l2 intersect the parallel lines at points A, B, C and D, E, F respectively, and DE = 6, what is the length of EF?" }, { "from": "gpt", "value": "The answer is EF = DE = 6" } ], "clip_score": 34.75, "loss": 2.0605006217956543 }, { "id": "identity_279351", "image": "GeoQA+/images/1943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure where line AB is parallel to line CD, and line EF intersects lines AB and CD at points G and H, if angle 1 is 105°, what is the measure of angle 2? \n(A) 45°\n(B) 55°\n(C) 65°\n(D) 75°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.125, "loss": 2.4676761627197266 }, { "id": "identity_312917", "image": "GeoQA+/images/1258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AD∥BE∥CF,直线l_{1},l_{2}与这些平行线分别交于点A,B,C和点D,E,F,=,DE=6,则DF的值为()\nChoices:\n(A) 4\n(B) 9\n(C) 10\n(D) 15" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.28125, "loss": 2.3815224170684814 }, { "id": "identity_251348", "image": "DocVQA/images/rzcd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full title and date of the event mentioned in the document?" }, { "from": "gpt", "value": "The answer is Eighteenth Annual Meeting, December 5-6, 1968" } ], "clip_score": 30.859375, "loss": 1.3416943550109863 }, { "id": "identity_218172", "image": "A-OKVQA/images/310808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the boat closest to the dock?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.109375, "loss": 2.23245906829834 }, { "id": "identity_247183", "image": "VizWiz/images/VizWiz_train_00004219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are placed on the sink, and what are their colors?" }, { "from": "gpt", "value": "The answer is Three items: red and white, silver, clear with white" } ], "clip_score": 28.71875, "loss": 4.066136837005615 }, { "id": "identity_228882", "image": "PMC-VQA/images/PMC6371611_Fig2_434553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas are affected by the opacification according to the image, and what is the relative intensity of opacification in each affected area?" }, { "from": "gpt", "value": "The answer is Right ICA is affected with high intensity of opacification" } ], "clip_score": 26.015625, "loss": 3.676774501800537 }, { "id": "identity_230728", "image": "PMC-VQA/images/PMC2492868_F6_26128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in the Western blot technique used to obtain the results presented in the figure, and what is the purpose of each step?" }, { "from": "gpt", "value": "The answer is Sample preparation, protein quantification, gel electrophoresis, transfer, blocking, primary antibody incubation, secondary antibody incubation, detection" } ], "clip_score": 20.375, "loss": 1.7442795038223267 }, { "id": "identity_4877", "image": "UniGeo/images/1093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 已知如图,四边形ABCD内接于⊙O,若∠BOD=100°,则∠BCD的度数是()\nChoices:\n(A) 50°\n(B) 80°\n(C) 100°\n(D) 130°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.5, "loss": 2.363076686859131 }, { "id": "identity_259460", "image": "TabMWP/images/18362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean number of math teachers across all high schools, and how does it compare to the median number of math teachers?" }, { "from": "gpt", "value": "The answer is Mean is 8.8, median is 11" } ], "clip_score": 32.75, "loss": 2.40983510017395 }, { "id": "identity_287051", "image": "VQA-AS/images/1024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason behind the people clutching their chests?" }, { "from": "gpt", "value": "The answer is they are laughing" } ], "clip_score": 22.6875, "loss": 2.504690647125244 }, { "id": "identity_295788", "image": "MapQA/images/map_12330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend represented by a continuous bar?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.0625, "loss": 3.3795228004455566 }, { "id": "identity_299964", "image": "GeoQA+/images/5959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that points A, B, C, and D lie on circle O, with AB and CD intersecting at an external point E, ∠BCD is 25°, and ∠E is 39°. What is the degree measure of ∠ADC?\nChoices:\n(A) 64°\n(B) 65°\n(C) 51°\n(D) 54°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.4375, "loss": 2.1834824085235596 }, { "id": "identity_282044", "image": "VQA2.0/images/196061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of horses that you see?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 16.6875, "loss": 1.7404371500015259 }, { "id": "identity_134575", "image": "DocVQA/images/lkbk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city is the Monell Chemical Senses Center located in?" }, { "from": "gpt", "value": "The answer is Philadelphia, Pennsylvania" } ], "clip_score": 20.71875, "loss": 2.6042068004608154 }, { "id": "identity_320337", "image": "DVQA/images/bar_train_00190847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the lowest bar in each group?" }, { "from": "gpt", "value": "The answer is rise" } ], "clip_score": 21.796875, "loss": 3.15903902053833 }, { "id": "identity_251074", "image": "ChartQA/images/multi_col_60265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the Q1 2014 and Q1 2013 values for the sector with the largest Q1 2014 value?" }, { "from": "gpt", "value": "The answer is 0.65" } ], "clip_score": 27.28125, "loss": 2.2310330867767334 }, { "id": "identity_235933", "image": "IconQA/images/62712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does the object have, and where are they located?" }, { "from": "gpt", "value": "The answer is 0 lines of symmetry" } ], "clip_score": 25.5, "loss": 3.060696840286255 }, { "id": "identity_114759", "image": "A-OKVQA/images/231379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked on the side of the road?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 18.90625, "loss": 2.440037488937378 }, { "id": "identity_184379", "image": "A-OKVQA/images/535770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the curtains in the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 18.484375, "loss": 3.5409939289093018 }, { "id": "identity_182434", "image": "A-OKVQA/images/106661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 27.203125, "loss": 2.2794368267059326 }, { "id": "identity_172956", "image": "IconQA/images/39190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips long is the line if each paper clip is aligned end to end without overlapping?" }, { "from": "gpt", "value": "The answer is 3 paper clips" } ], "clip_score": 29.140625, "loss": 1.6000994443893433 }, { "id": "identity_193306", "image": "PMC-VQA/images/PMC6813448_mbo3894-fig-0005_56317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lichen in the image a single organism or a colony of multiple organisms?" }, { "from": "gpt", "value": "The answer is Single organism" } ], "clip_score": 30.0625, "loss": 2.857196807861328 }, { "id": "identity_291777", "image": "VQA2.0/images/559633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal is located behind the fence?" }, { "from": "gpt", "value": "The answer is horse" } ], "clip_score": 24.703125, "loss": 4.1632914543151855 }, { "id": "identity_138691", "image": "DVQA/images/bar_train_00066120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by exactly 5 people in the \"hill\" category?" }, { "from": "gpt", "value": "The answer is ballet" } ], "clip_score": 32.65625, "loss": 2.7199409008026123 }, { "id": "identity_308672", "image": "DocVQA/images/hjpj0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What contributes minimally to the average total intake of caffeine?" }, { "from": "gpt", "value": "The answer is soft drinks" } ], "clip_score": 23.671875, "loss": 2.71264910697937 }, { "id": "identity_97648", "image": "IconQA/images/102160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one petal from each flower were removed, how many petals would remain?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 30.1875, "loss": 1.965404748916626 }, { "id": "identity_123482", "image": "A-OKVQA/images/208963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the headband worn by the person on the right?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 12.53125, "loss": 3.5848050117492676 }, { "id": "identity_68483", "image": "TQA/images/question_images/periodic_table_8161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the electron configuration of the element with the symbol 'Ar'?" }, { "from": "gpt", "value": "The answer is Ne]3s2 3p6" } ], "clip_score": 23.984375, "loss": 2.126605749130249 }, { "id": "identity_227967", "image": "GeoQA+/images/991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O直径,C是AB延长线上一点,CD是⊙O的切线,D为切点,∠A=30°,AB=2cm,求△ACD的面积。" }, { "from": "gpt", "value": "The answer is 1/2 * x * √3 cm" } ], "clip_score": 26.140625, "loss": 2.91455078125 }, { "id": "identity_237219", "image": "FigureQA/images/51635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Gray and Medium Aqua compared to Forest Green?" }, { "from": "gpt", "value": "The answer is Less than Forest Green" } ], "clip_score": 35.5625, "loss": 3.4770314693450928 }, { "id": "identity_194069", "image": "A-OKVQA/images/496362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus in the background?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.78125, "loss": 3.558445692062378 }, { "id": "identity_229350", "image": "AI2D/images/3055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the role of the mitochondrion in cellular respiration and describe the three main stages of this process" }, { "from": "gpt", "value": "The answer is Produces ATP through glycolysis, Krebs cycle, and electron transport chain" } ], "clip_score": 23.140625, "loss": 2.172882080078125 }, { "id": "identity_328930", "image": "TabMWP/images/12480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Joel entered his sister into their town's Smelly Shoe Competition. Joel observed contestants of all ages proudly strutting around wearing different kinds of smelly shoes. What is the probability that a randomly selected contestant is wearing some tennis shoes and is under age 12? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 5/16" } ], "clip_score": 24.265625, "loss": 1.7015585899353027 }, { "id": "identity_162831", "image": "Geometry3K/images/671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle PQS?" }, { "from": "gpt", "value": "The answer is 20 units (assuming PS = 10 units)" } ], "clip_score": 34.75, "loss": 2.071049928665161 }, { "id": "identity_274297", "image": "FigureQA/images/48467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 4.532970428466797 }, { "id": "identity_148331", "image": "DVQA/images/bar_train_00143427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has a higher preference for jackets?" }, { "from": "gpt", "value": "The answer is killer" } ], "clip_score": 32.75, "loss": 3.485797643661499 }, { "id": "identity_190655", "image": "A-OKVQA/images/513145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the umbrella held by the individual in the center?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.296875, "loss": 3.6113827228546143 }, { "id": "identity_54966", "image": "DVQA/images/bar_train_00181337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy on the 'dark' dataset?" }, { "from": "gpt", "value": "The answer is energy" } ], "clip_score": 32.90625, "loss": 3.5493998527526855 }, { "id": "identity_250101", "image": "A-OKVQA/images/302076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what are three indicators that suggest a \"shoes off\" rule is in place, and why might this rule be implemented in this setting?" }, { "from": "gpt", "value": "The answer is Cleanliness, light-colored carpet, luxury decor" } ], "clip_score": 23.78125, "loss": 3.130500555038452 }, { "id": "identity_97870", "image": "IconQA/images/96574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bugs have stripes on their back?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.90625, "loss": 2.461383819580078 }, { "id": "identity_206386", "image": "TextVQA/images/253ce025df6ab8c7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main language used in the text of the poster?" }, { "from": "gpt", "value": "The answer is German" } ], "clip_score": 18.0, "loss": 3.688307285308838 }, { "id": "identity_9474", "image": "FigureQA/images/24369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.34375, "loss": 4.04938268661499 }, { "id": "identity_321309", "image": "TextVQA/images/28078b0f13f24371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which hotel sponsors this place?" }, { "from": "gpt", "value": "The answer is terrace" } ], "clip_score": 13.3828125, "loss": 4.071140289306641 }, { "id": "identity_86244", "image": "UniGeo/images/2512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠AOB = 70°, what is the degree measure of the minor arc AB?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 34.21875, "loss": 1.8779656887054443 }, { "id": "identity_111997", "image": "A-OKVQA/images/270193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the train moving or stationary?" }, { "from": "gpt", "value": "The answer is Stationary" } ], "clip_score": 19.109375, "loss": 3.036273717880249 }, { "id": "identity_176442", "image": "IconQA/images/1396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are neither green nor triangles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.3125, "loss": 2.497663974761963 }, { "id": "identity_36538", "image": "TabMWP/images/31127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A front desk clerk looked up how many guests the hotel had each day. What is the mean of the numbers?" }, { "from": "gpt", "value": "The answer is 91" } ], "clip_score": 33.0, "loss": 2.666785955429077 }, { "id": "identity_296420", "image": "PMC-VQA/images/PMC3200069_fig1_112904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In Case 3, where are the radio-opaque shadows visible?\nChoices:\n(A) Right kidney\n(B) Left kidney\n(C) Liver\n(D) Spleen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.8125, "loss": 4.30648136138916 }, { "id": "identity_172969", "image": "TabMWP/images/4216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average amount of funds raised per team?" }, { "from": "gpt", "value": "The answer is $614" } ], "clip_score": 28.75, "loss": 2.6502151489257812 }, { "id": "identity_102437", "image": "IconQA/images/86625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of animals shown in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.5, "loss": 1.8079862594604492 }, { "id": "identity_298281", "image": "VQA2.0/images/313798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible to you?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 10.484375, "loss": 3.344837188720703 }, { "id": "identity_51417", "image": "DocVQA/images/tlnf0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of expenses for research during the year?" }, { "from": "gpt", "value": "The answer is $4,500,000" } ], "clip_score": 14.6015625, "loss": 1.5100853443145752 }, { "id": "identity_206079", "image": "VQA2.0/images/390792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.359375, "loss": 2.240039587020874 }, { "id": "identity_96933", "image": "CLEVR-Math/images/CLEVR_train_008353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more shiny sphere is added, how many shiny objects will there be?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.28125, "loss": 2.2353429794311523 }, { "id": "identity_220373", "image": "VQA2.0/images/512292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant hair color in the images?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 27.25, "loss": 3.7990238666534424 }, { "id": "identity_288585", "image": "ScienceQA/images/12921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic is shared by both of these objects?\nChoices:\n(A) colorful\n(B) stretchy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.453125, "loss": 4.353853702545166 }, { "id": "identity_183149", "image": "PMC-VQA/images/PMC5686666_F2_239743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measurement indicated by the red line in the echocardiographic image?" }, { "from": "gpt", "value": "The answer is Width of the mitral regurgitation jet" } ], "clip_score": 35.375, "loss": 2.911400318145752 }, { "id": "identity_116192", "image": "VQA-RAD/images/synpic32081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No evidence of pleural effusion" } ], "clip_score": 33.09375, "loss": 2.596367835998535 }, { "id": "identity_331930", "image": "TextVQA/images/699f316f0bddb213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sign is written in black letters on this wall?" }, { "from": "gpt", "value": "The answer is registers" } ], "clip_score": 13.9296875, "loss": 3.674884080886841 }, { "id": "identity_245524", "image": "ChartQA/images/two_col_42872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average unemployment rate from 2010 to 2015?" }, { "from": "gpt", "value": "The answer is 17.98%" } ], "clip_score": 29.53125, "loss": 1.7567237615585327 }, { "id": "identity_257668", "image": "DVQA/images/bar_train_00163912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of all bar values in groups that contain at least one bar with a value smaller than 8?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 24.703125, "loss": 2.194148540496826 }, { "id": "identity_119974", "image": "PMC-VQA/images/PMC6182710_Fig4_381212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image an X-ray or another type of medical imaging?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 29.53125, "loss": 2.011812448501587 }, { "id": "identity_67908", "image": "TQA/images/question_images/human_system_circulatory_3622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Capillaries\" in the lung?" }, { "from": "gpt", "value": "The answer is Gas exchange" } ], "clip_score": 28.53125, "loss": 3.1175155639648438 }, { "id": "identity_134739", "image": "FigureQA/images/31731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Dark Turquoise and Light Slate?" }, { "from": "gpt", "value": "The answer is Approximately 50%" } ], "clip_score": 32.625, "loss": 2.4340524673461914 }, { "id": "identity_318755", "image": "UniGeo/images/2875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC的顶点都在⊙O上,连接AO、BO,若∠OAB=50°,则∠ACB的大小是()\nChoices:\n(A) 25°\n(B) 40°\n(C) 50°\n(D) 80°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.421875, "loss": 2.5713796615600586 }, { "id": "identity_20721", "image": "GeoQA+/images/4480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点F在平行四边形ABCD的边CD上,且\\frac{CF}{AB}=\\frac{2}{3},连接BF并延长交AD的延长线于点E,则\\frac{DE}{BC}的值为()\nChoices:\n(A) \\frac{1}{3}\n(B) \\frac{2}{3}\n(C) \\frac{1}{2}\n(D) \\frac{2}{5}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 2.4389262199401855 }, { "id": "identity_59191", "image": "FigureQA/images/19558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of the Periwinkle section in the pie chart?" }, { "from": "gpt", "value": "The answer is 0.44 or 44%" } ], "clip_score": 34.5625, "loss": 1.4565290212631226 }, { "id": "identity_240787", "image": "ScienceQA/images/6328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which empire controlled Mesopotamia immediately before the Babylonian Empire started ruling in ca. 1792 BCE, and how many years did this empire rule Mesopotamia?" }, { "from": "gpt", "value": "The answer is Neo-Sumerian Empire, 96 years" } ], "clip_score": 36.5, "loss": 1.8477909564971924 }, { "id": "identity_275837", "image": "DocVQA/images/qsjw0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the individual speaking in the video titled \"A View of 'Fess' in the Laboratory\"?" }, { "from": "gpt", "value": "The answer is Maclyn McCarty" } ], "clip_score": 23.109375, "loss": 3.1060237884521484 }, { "id": "identity_232103", "image": "Super-CLEVR/images/superCLEVR_new_001915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image share the same color as the shiny aeroplane, and what are their shapes?" }, { "from": "gpt", "value": "The answer is Two objects, SUV and toy plane" } ], "clip_score": 31.671875, "loss": 4.1440348625183105 }, { "id": "identity_176515", "image": "IconQA/images/97603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shape also known as a hexahedron?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.90625, "loss": 3.290147066116333 }, { "id": "identity_99083", "image": "TabMWP/images/8303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of train tickets sold over the 4 days?" }, { "from": "gpt", "value": "The answer is 3927" } ], "clip_score": 37.8125, "loss": 2.421989917755127 }, { "id": "identity_189150", "image": "VQA-AS/images/17458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 25.078125, "loss": 4.42186164855957 }, { "id": "identity_104039", "image": "VQA2.0/images/304997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the uniforms of the team in the field?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.53125, "loss": 3.609133243560791 }, { "id": "identity_104094", "image": "VQA-AS/images/429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the curtains in the window?" }, { "from": "gpt", "value": "The answer is White with red and yellow flowers" } ], "clip_score": 24.421875, "loss": 3.2685673236846924 }, { "id": "identity_37159", "image": "PMC-VQA/images/PMC5893487_Fig21_299403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images were taken of the woman with endometriosis of the rectal wall? \nChoices:\n(A) Oblique sagittal T1-weighted images. \n(B) Axial T2-weighted images. \n(C) Coronal T1-weighted images. \n(D) Oblique coronal T2-weighted images. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.625, "loss": 3.143991231918335 }, { "id": "identity_29783", "image": "DVQA/images/bar_train_00164174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did the item law in the store dealer sold smaller units than the item shot in the store car?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 27.96875, "loss": 3.400871992111206 }, { "id": "identity_302444", "image": "FigureQA/images/81066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Web Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.0, "loss": 4.051346302032471 }, { "id": "identity_114532", "image": "A-OKVQA/images/130339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the giraffes' fur?" }, { "from": "gpt", "value": "The answer is Spotted or patchy" } ], "clip_score": 29.890625, "loss": 2.997586727142334 }, { "id": "identity_162354", "image": "UniGeo/images/2986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, CD is tangent to circle O at point D, and angle A is 25 degrees, what is the measure of angle BOD?" }, { "from": "gpt", "value": "The answer is 25 degrees" } ], "clip_score": 35.8125, "loss": 1.637678623199463 }, { "id": "identity_332367", "image": "A-OKVQA/images/195561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does this time of year occur?\nChoices:\n(A) spring\n(B) summer\n(C) fall\n(D) winter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.3203125, "loss": 4.166150093078613 }, { "id": "identity_58116", "image": "ChartQA/images/two_col_45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of mass shooters across all ethnicities?" }, { "from": "gpt", "value": "The answer is 17.57" } ], "clip_score": 25.84375, "loss": 1.9547456502914429 }, { "id": "identity_11931", "image": "UniGeo/images/2116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 三个等边三角形的摆放位置如图,若∠3=60°,则∠1+∠2的度数为()\nChoices:\n(A) 90°\n(B) 120°\n(C) 270°\n(D) 360°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.90625, "loss": 2.9704480171203613 }, { "id": "identity_299751", "image": "UniGeo/images/3263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, it is known that AB, CD, and EF are all perpendicular to BD, with the feet of the perpendiculars being B, D, and F respectively. If AB = 4 and CD = 12, what is the length of EF?\nChoices:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 2.8" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.78125, "loss": 2.22837495803833 }, { "id": "identity_119888", "image": "VQA2.0/images/516771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many security personnel can be seen in the image?" }, { "from": "gpt", "value": "The answer is There are two security personnel" } ], "clip_score": 18.1875, "loss": 3.1526401042938232 }, { "id": "identity_298615", "image": "VQA2.0/images/14966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man performing a magic trick?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.8203125, "loss": 4.481668949127197 }, { "id": "identity_238566", "image": "DVQA/images/bar_train_00086227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold across all stores for items that sold more than 1 unit in at least one store?" }, { "from": "gpt", "value": "The answer is 52.5 units" } ], "clip_score": 30.0625, "loss": 1.8757530450820923 }, { "id": "identity_183145", "image": "PMC-VQA/images/PMC5686666_F2_239743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abnormality labeled in the echocardiographic image?" }, { "from": "gpt", "value": "The answer is Mitral Regurgitation" } ], "clip_score": 35.71875, "loss": 1.9511585235595703 }, { "id": "identity_59667", "image": "FigureQA/images/9585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Crimson section represent more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.3125, "loss": 3.266718626022339 }, { "id": "identity_94437", "image": "IconQA/images/59069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many submarines are there in total?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.921875, "loss": 2.4361326694488525 }, { "id": "identity_59990", "image": "PlotQA/images/15224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for Pacific island small states?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 25.6875, "loss": 2.7690846920013428 }, { "id": "identity_150425", "image": "AI2D/images/2976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure that is not always present in the cell as indicated in the diagram?" }, { "from": "gpt", "value": "The answer is Flagellum" } ], "clip_score": 32.0, "loss": 2.4134907722473145 }, { "id": "identity_59773", "image": "DocVQA/images/xkjp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the film being shown at the Alumni Day Program?" }, { "from": "gpt", "value": "The answer is That Certain Spirit" } ], "clip_score": 26.515625, "loss": 3.44973087310791 }, { "id": "identity_87402", "image": "Geometry3K/images/1073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the figure were to be folded along the hypotenuse of the larger triangle, what would be the length of the resulting figure's base?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.515625, "loss": 2.2128560543060303 }, { "id": "identity_224153", "image": "AI2D/images/3478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the parts of the grasshopper's mouthparts that are involved in manipulating food and describe their functions" }, { "from": "gpt", "value": "The answer is Mandibles, maxillae, labrum, labium, hypopharynx" } ], "clip_score": 38.28125, "loss": 2.1443819999694824 }, { "id": "identity_315560", "image": "IconQA/images/72045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this dotted line a line of symmetry?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 3.931652545928955 }, { "id": "identity_263800", "image": "Geometry3K/images/1843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the perimeter of the triangle and round the answer to the nearest hundredth.\nChoices:\n(A) 27.31\n(B) 28.52\n(C) 60.34\n(D) 61.55" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.3125, "loss": 2.287092924118042 }, { "id": "identity_23440", "image": "A-OKVQA/images/506508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the tall thin things carry?\nChoices:\n(A) graphite\n(B) wind turbine\n(C) fruits\n(D) power lines" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.046875, "loss": 4.253300189971924 }, { "id": "identity_230102", "image": "TextVQA/images/1d30c9cccb5d0ed4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name on the transom of this ship, and what is the name of the pavilion mentioned on the ship?" }, { "from": "gpt", "value": "The answer is Intrepid, Space Shuttle Pavilion" } ], "clip_score": 36.53125, "loss": 1.7027164697647095 }, { "id": "identity_334812", "image": "PMC-VQA/images/PMC5894172_Fig2_299739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What diagnostic imaging technique was used to detect the irregular mass in this area?\nChoices:\n(A) Magnetic resonance imaging (MRI)\n(B) Computed tomography (CT) scan\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.21875, "loss": 4.314942359924316 }, { "id": "identity_155986", "image": "ScienceQA/images/572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is bordered by four other states to its north?" }, { "from": "gpt", "value": "The answer is Colorado" } ], "clip_score": 30.125, "loss": 3.1314449310302734 }, { "id": "identity_244880", "image": "PMC-VQA/images/PMC6617077_jcm-08-00771-f001_6055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the surface conditions of the PEEK samples shown in a-c and determine which sample has the smoothest surface texture" }, { "from": "gpt", "value": "The answer is Sample a" } ], "clip_score": 27.078125, "loss": 3.8837010860443115 }, { "id": "identity_172375", "image": "TabMWP/images/13483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of train tickets sold over the 5 days?" }, { "from": "gpt", "value": "The answer is 165" } ], "clip_score": 36.09375, "loss": 2.0557875633239746 }, { "id": "identity_222562", "image": "Super-CLEVR/images/superCLEVR_new_001588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the airplane located to the left of the purple matte object?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.875, "loss": 4.063756942749023 }, { "id": "identity_115902", "image": "VQA-AS/images/16599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the seesaw?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 25.890625, "loss": 3.8750739097595215 }, { "id": "identity_74066", "image": "ScienceQA/images/11461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the specimen in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.671875, "loss": 3.754690647125244 }, { "id": "identity_145466", "image": "MapQA/images/map_8734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Texas' value greater than 4.5M?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 18.96875, "loss": 2.9605162143707275 }, { "id": "identity_119800", "image": "TextVQA/images/c56b6eeef46eff4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the background where the plaque is mounted?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.4375, "loss": 2.8745381832122803 }, { "id": "identity_22220", "image": "ScienceQA/images/15905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Kentucky\n(B) Florida\n(C) Connecticut\n(D) Rhode Island" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.0, "loss": 4.65010404586792 }, { "id": "identity_111915", "image": "TextVQA/images/5c30034c0876878e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name displayed on the back of the phone?" }, { "from": "gpt", "value": "The answer is HTC" } ], "clip_score": 32.3125, "loss": 2.282451868057251 }, { "id": "identity_274392", "image": "TQA/images/question_images/cellular_respiration_6622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is classified as a by-product in the process of photosynthesis? \n(A) carbon dioxide\n(B) glucose\n(C) oxygen\n(D) water" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.109375, "loss": 3.944167375564575 }, { "id": "identity_152478", "image": "ScienceQA/images/1202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is located northeast of the highlighted state?" }, { "from": "gpt", "value": "The answer is Lake Erie" } ], "clip_score": 23.40625, "loss": 2.305959939956665 }, { "id": "identity_323273", "image": "UniGeo/images/3496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线l过正方形的一个顶点,点A,C到直线l的距离分别为1和2,则正方形的边长是()\n\nChoices:\n(A) 2\n(B) √{5}\n(C) 3\n(D) √{6}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.53125, "loss": 2.831277370452881 }, { "id": "identity_210287", "image": "VizWiz/images/VizWiz_train_00014144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What beverage is contained in the can?" }, { "from": "gpt", "value": "The answer is Coca-Cola" } ], "clip_score": 33.4375, "loss": 1.7570359706878662 }, { "id": "identity_285276", "image": "IconQA/images/74351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture corresponds to the seventh position in the sequence, given that the first picture is a bucket?\nChoices:\n(A) crab\n(B) bucket\n(C) boat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.046875, "loss": 3.5735385417938232 }, { "id": "identity_208921", "image": "A-OKVQA/images/225177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the school bus?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 21.375, "loss": 2.533219337463379 }, { "id": "identity_79374", "image": "Geometry3K/images/2094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle KLR in the parallelogram JKLM?" }, { "from": "gpt", "value": "The answer is 135°" } ], "clip_score": 36.09375, "loss": 2.023972511291504 }, { "id": "identity_194010", "image": "TextVQA/images/001bebecea382500.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the make of the classic car displayed in the image?" }, { "from": "gpt", "value": "The answer is Jaguar" } ], "clip_score": 28.9375, "loss": 2.443000078201294 }, { "id": "identity_230448", "image": "IconQA/images/6676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you cut the twig into 4 equal pieces, what is the length of each piece to the nearest inch?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 34.3125, "loss": 2.6587936878204346 }, { "id": "identity_224118", "image": "ScienceQA/images/10310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary producer, primary consumer, secondary consumer, and tertiary consumer in this food web, and explain the flow of energy through these trophic levels" }, { "from": "gpt", "value": "The answer is Golden algae, copepod, water flea, black crappie" } ], "clip_score": 36.09375, "loss": 2.1723685264587402 }, { "id": "identity_179197", "image": "CLEVR-Math/images/CLEVR_train_035031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the objects are cubes?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 25.015625, "loss": 1.7201519012451172 }, { "id": "identity_152763", "image": "ScienceQA/images/17212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is to the immediate west of the highlighted state?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 30.03125, "loss": 2.831052541732788 }, { "id": "identity_273032", "image": "ScienceQA/images/6229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the solutions has a greater concentration of purple particles?\nChoices:\n(A) Both have the same concentration\n(B) Solution A\n(C) Solution B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.5625, "loss": 3.3172786235809326 }, { "id": "identity_312630", "image": "DocVQA/images/mzvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was this document issued?" }, { "from": "gpt", "value": "The answer is SEPTEMBER 15, 1975" } ], "clip_score": 25.953125, "loss": 1.4834073781967163 }, { "id": "identity_289399", "image": "MapQA/images/map_3294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Arkansas the state with the highest value among those that border Louisiana?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.546875, "loss": 2.7462081909179688 }, { "id": "identity_293439", "image": "VQA2.0/images/11682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is depicted?" }, { "from": "gpt", "value": "The answer is giraffe" } ], "clip_score": 30.390625, "loss": 2.5538365840911865 }, { "id": "identity_128464", "image": "TextVQA/images/445568e99c2861d7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name signed at the bottom right of the graffiti?" }, { "from": "gpt", "value": "The answer is Sparks" } ], "clip_score": 17.1875, "loss": 3.802305221557617 }, { "id": "identity_75768", "image": "ScienceQA/images/3741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is the tree undergoing in the image?" }, { "from": "gpt", "value": "The answer is Tapping" } ], "clip_score": 17.5, "loss": 4.28993034362793 }, { "id": "identity_63183", "image": "TQA/images/abc_question_images/tides_10136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the waxing crescent phase of the moon?" }, { "from": "gpt", "value": "The answer is None depicted" } ], "clip_score": 27.953125, "loss": 4.5482378005981445 }, { "id": "identity_273833", "image": "TextVQA/images/8f31789c43ac62ca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify that person?" }, { "from": "gpt", "value": "The answer is the dark knight" } ], "clip_score": 28.96875, "loss": 2.3300671577453613 }, { "id": "identity_226809", "image": "PMC-VQA/images/PMC8195437_ppat.1009632.g003_464513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the likely method used to identify the viral particles indicated by the white arrows, and what is the significance of the color coding in the image?" }, { "from": "gpt", "value": "The answer is Immunofluorescence; blue for nuclei, red for viral particles" } ], "clip_score": 29.015625, "loss": 2.4504730701446533 }, { "id": "identity_2021", "image": "Super-CLEVR/images/superCLEVR_new_000553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The metal thing is what color?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 15.765625, "loss": 3.973820924758911 }, { "id": "identity_183637", "image": "VQA2.0/images/340736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fruits visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, there are fruits" } ], "clip_score": 13.859375, "loss": 2.7935166358947754 }, { "id": "identity_77984", "image": "AI2D/images/3403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the species depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Culex restuans" } ], "clip_score": 23.9375, "loss": 1.739598035812378 }, { "id": "identity_106667", "image": "VQA-RAD/images/synpic22794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a presence of kidney stones in this image?" }, { "from": "gpt", "value": "The answer is No presence of kidney stones" } ], "clip_score": 26.34375, "loss": 3.5419087409973145 }, { "id": "identity_90517", "image": "UniGeo/images/160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠ABO = 50°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 25°" } ], "clip_score": 37.09375, "loss": 1.743431568145752 }, { "id": "identity_182066", "image": "A-OKVQA/images/365947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any liquids visible in the transparent cups on the counter?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.2265625, "loss": 3.090559720993042 }, { "id": "identity_258759", "image": "VQA2.0/images/84819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of objects visible in the image, and how many distinct types can you identify?" }, { "from": "gpt", "value": "The answer is six distinct types" } ], "clip_score": 13.71875, "loss": 5.00430154800415 }, { "id": "identity_208838", "image": "PMC-VQA/images/PMC6855525_F1_68302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image taken in the axial, coronal, or sagittal plane?" }, { "from": "gpt", "value": "The answer is Coronal plane" } ], "clip_score": 28.9375, "loss": 2.5581061840057373 }, { "id": "identity_242580", "image": "VizWiz/images/VizWiz_train_00018770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the net weight of the can in grams?" }, { "from": "gpt", "value": "The answer is 439.42 grams" } ], "clip_score": 19.0625, "loss": 1.9823857545852661 }, { "id": "identity_38553", "image": "CLEVR-Math/images/CLEVR_train_042839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all blue cylinders. Subtract all green cubes. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.15625, "loss": 2.5652782917022705 }, { "id": "identity_240660", "image": "VQA2.0/images/553077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are in the sky and how many palm trees are visible in the image?" }, { "from": "gpt", "value": "The answer is 3 birds and 7 palm trees" } ], "clip_score": 29.28125, "loss": 1.389551043510437 }, { "id": "identity_145277", "image": "DocVQA/images/hmwn0226_37.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number of this document?" }, { "from": "gpt", "value": "The answer is 37" } ], "clip_score": 21.5, "loss": 2.703888177871704 }, { "id": "identity_185707", "image": "PMC-VQA/images/PMC3970346_fig1_277589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hemorrhage associated with the glioma in the MRI image?" }, { "from": "gpt", "value": "The answer is No, there is no evidence of hemorrhage" } ], "clip_score": 34.40625, "loss": 1.4892863035202026 }, { "id": "identity_99707", "image": "IconQA/images/62327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove two squares, how many squares will be left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.078125, "loss": 2.3885421752929688 }, { "id": "identity_237907", "image": "AI2D/images/3275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of structures you would encounter starting from the nucleolus and moving outward to the cell membrane?" }, { "from": "gpt", "value": "The answer is Nucleolus, Nucleus, Cytoplasm, Cell membrane" } ], "clip_score": 31.03125, "loss": 1.0524909496307373 }, { "id": "identity_299214", "image": "A-OKVQA/images/197163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many zebras are obstructing the path?\nChoices:\n(A) two\n(B) three\n(C) four\n(D) one" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.59375, "loss": 3.6042656898498535 }, { "id": "identity_305306", "image": "A-OKVQA/images/165200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the middle advertisement on the wall about?\nChoices:\n(A) athletic help\n(B) banking\n(C) suicide hotline\n(D) cellular service" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.484375, "loss": 4.265179634094238 }, { "id": "identity_255938", "image": "TextVQA/images/cd83b6c8254e0f7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours and minutes long is this movie?" }, { "from": "gpt", "value": "The answer is 1 hour and 50 minutes" } ], "clip_score": 17.421875, "loss": 1.329033613204956 }, { "id": "identity_286298", "image": "FigureQA/images/34662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dim Gray not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0234375, "loss": 3.933617115020752 }, { "id": "identity_302063", "image": "TextVQA/images/c5719706b2513a4b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of this cereal?" }, { "from": "gpt", "value": "The answer is frosted flakes" } ], "clip_score": 30.40625, "loss": 2.846219778060913 }, { "id": "identity_192586", "image": "VQA-AS/images/19080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the person's outfit?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.015625, "loss": 3.550419569015503 }, { "id": "identity_224782", "image": "IconQA/images/88086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Carter has a morning routine that takes exactly 45 minutes from the time he gets out of bed to the time he leaves for work. If Carter gets out of bed at the time shown on the clock, what time does he leave for work?" }, { "from": "gpt", "value": "The answer is 8:15 A.M" } ], "clip_score": 27.4375, "loss": 1.8351436853408813 }, { "id": "identity_323211", "image": "TabMWP/images/27257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the snowboarding event finish?\nChoices:\n(A) 3:15 P.M.\n(B) 9:40 P.M.\n(C) 9:25 P.M.\n(D) 10:55 P.M." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.125, "loss": 3.9857170581817627 }, { "id": "identity_138355", "image": "DVQA/images/bar_train_00149797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who preferred 'issue' in the 'turn' category?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 25.625, "loss": 2.708118438720703 }, { "id": "identity_77778", "image": "TQA/images/question_images/solar_system_6298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which planet is known as the Red Planet?" }, { "from": "gpt", "value": "The answer is Mars" } ], "clip_score": 29.46875, "loss": 2.7053256034851074 }, { "id": "identity_59854", "image": "DVQA/images/bar_train_00144553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between 'talk' and 'trade' datasets for the 'animal' method?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.8125, "loss": 3.010950803756714 }, { "id": "identity_165979", "image": "IconQA/images/43298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many additional paper clips would be needed to measure a line twice as long as the one shown?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.5, "loss": 2.305467367172241 }, { "id": "identity_122259", "image": "VQA2.0/images/320245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a person visible inside the establishment?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.3203125, "loss": 3.157363176345825 }, { "id": "identity_28583", "image": "CLEVR-Math/images/CLEVR_train_026492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 0 blue spheres. How many objects are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.453125, "loss": 2.2821807861328125 }, { "id": "identity_67484", "image": "ScienceQA/images/20037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely function of the large teeth in the front of the mouth?" }, { "from": "gpt", "value": "The answer is Grasping and holding prey" } ], "clip_score": 26.234375, "loss": 2.9204251766204834 }, { "id": "identity_316669", "image": "GeoQA+/images/4781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线AB垂直于CD于O,直线EF交AB于O,∠COF=70°,则∠AOE等于()\n选项:\n(A) 20°\n(B) 30°\n(C) 35°\n(D) 70°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.40625, "loss": 2.854663372039795 }, { "id": "identity_243073", "image": "ScienceQA/images/8817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total amount of thermal energy lost by both cakes if the specific heat capacity of the cake is 2.5 J/g°C and the mass of each cake is 500 grams" }, { "from": "gpt", "value": "The answer is 36250 J" } ], "clip_score": 29.34375, "loss": 2.22110652923584 }, { "id": "identity_4277", "image": "ScienceQA/images/289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Pennsylvania?\nChoices:\n(A) Harrisburg\n(B) Trenton\n(C) Pittsburgh\n(D) Philadelphia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.140625, "loss": 4.294569969177246 }, { "id": "identity_194839", "image": "PMC-VQA/images/PMC3439461_pone-0044086-g002_154302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the MRI section (b), which structure is indicated by the brown color?" }, { "from": "gpt", "value": "The answer is Optic Tract (OT)" } ], "clip_score": 23.046875, "loss": 2.657331705093384 }, { "id": "identity_219786", "image": "VQA2.0/images/523262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski poles can be seen in the image?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 21.578125, "loss": 1.8968034982681274 }, { "id": "identity_98529", "image": "TabMWP/images/8819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in food preparation time between the day with the longest time and the day with the shortest time?" }, { "from": "gpt", "value": "The answer is 6 minutes" } ], "clip_score": 34.375, "loss": 2.250483989715576 }, { "id": "identity_292197", "image": "GeoQA+/images/4498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C lie on the circumference of circle O. Lines OA, OC, and OD are drawn with OD perpendicular to AB at point D. If AO bisects angle CAB, and angle CAB is 50°, what is the measure of angle OCB? \n(A) 40°\n(B) 35°\n(C) 30°\n(D) 25°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.9375, "loss": 2.1882708072662354 }, { "id": "identity_335221", "image": "TabMWP/images/18491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Jasper buys 1 kilogram of this. How much does he spend?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.78125, "loss": 3.3424501419067383 }, { "id": "identity_319720", "image": "GeoQA+/images/2231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,是一个含30°角的三角板放在一个菱形纸片上,且斜边与菱形的一边平行,则这个角的度数是()\n\nChoices:\n(A) 65°\n(B) 60°\n(C) 58°\n(D) 55°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.625, "loss": 2.7615437507629395 }, { "id": "identity_203619", "image": "TextVQA/images/7f6c3823509de616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number of the restaurant?" }, { "from": "gpt", "value": "The answer is 840-710" } ], "clip_score": 20.078125, "loss": 2.644477128982544 }, { "id": "identity_267494", "image": "FigureQA/images/93346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Medium Seafoam and Cyan intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.0078125, "loss": 3.6237473487854004 }, { "id": "identity_270956", "image": "DocVQA/images/sllk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is utilized to test for the deficiency of sorbitol and mannitol at low levels?" }, { "from": "gpt", "value": "The answer is rats" } ], "clip_score": 20.78125, "loss": 2.235480308532715 }, { "id": "identity_61843", "image": "FigureQA/images/63126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are represented by the color Hot Pink?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 30.796875, "loss": 2.2441465854644775 }, { "id": "identity_155837", "image": "ScienceQA/images/9598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant vegetation visible in the Tallgrass Prairie National Preserve?" }, { "from": "gpt", "value": "The answer is Tallgrass" } ], "clip_score": 30.359375, "loss": 2.283689260482788 }, { "id": "identity_152807", "image": "AI2D/images/618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days longer does the larva stage last compared to the pupa stage at maximum duration?" }, { "from": "gpt", "value": "The answer is 23 days" } ], "clip_score": 36.1875, "loss": 2.3818249702453613 }, { "id": "identity_6892", "image": "TextVQA/images/5bae72eb4abbfe29.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is this phone?" }, { "from": "gpt", "value": "The answer is lg" } ], "clip_score": 23.65625, "loss": 3.073446035385132 }, { "id": "identity_211532", "image": "VQA2.0/images/373936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the player's wheelchair have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.375, "loss": 3.8235905170440674 }, { "id": "identity_280520", "image": "VQA-AS/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do flowers exist?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.453125, "loss": 3.5077877044677734 }, { "id": "identity_116468", "image": "PMC-VQA/images/PMC6102916_Fig1_357108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique is likely used in this histological image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) stain" } ], "clip_score": 32.96875, "loss": 1.0302029848098755 }, { "id": "identity_130496", "image": "VQA2.0/images/34469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure above the horses?" }, { "from": "gpt", "value": "The answer is A roof" } ], "clip_score": 25.625, "loss": 4.240520000457764 }, { "id": "identity_15319", "image": "TextVQA/images/25be68612a489971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is this a collection of old letters?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.890625, "loss": 3.2327702045440674 }, { "id": "identity_160083", "image": "GeoQA+/images/4522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of ∠1, ∠2, and ∠3?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 32.90625, "loss": 1.7982401847839355 }, { "id": "identity_219724", "image": "TextVQA/images/fe531f1bc6187758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the taxi in the bottom right corner?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 27.265625, "loss": 2.458918571472168 }, { "id": "identity_186421", "image": "A-OKVQA/images/343948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the foliage in the image?" }, { "from": "gpt", "value": "The answer is The foliage is dense and appears to be healthy" } ], "clip_score": 22.515625, "loss": 2.614286422729492 }, { "id": "identity_126843", "image": "A-OKVQA/images/352515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tennis court?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.203125, "loss": 3.125185251235962 }, { "id": "identity_188004", "image": "PMC-VQA/images/PMC6502287_FIG1_475685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the abnormalities indicated by the yellow arrows?" }, { "from": "gpt", "value": "The answer is Brain lesions or loss of brain matter" } ], "clip_score": 34.5625, "loss": 3.048349618911743 }, { "id": "identity_300359", "image": "Geometry3K/images/564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: J H, J P, and P H are midsegments of a triangle. Find x.\nChoices:\n(A) 37\n(B) 47\n(C) 57\n(D) 67" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.3125, "loss": 2.571805000305176 }, { "id": "identity_267191", "image": "MapQA/images/map_6545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Connecticut lower than that of Nevada?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.390625, "loss": 3.4187774658203125 }, { "id": "identity_336358", "image": "IconQA/images/100924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use dice to measure this. This is about (_) dice long." }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.8125, "loss": 2.404843807220459 }, { "id": "identity_74392", "image": "AI2D/images/2344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage comes immediately after the spore in the life cycle?" }, { "from": "gpt", "value": "The answer is Mature gametophyte" } ], "clip_score": 27.109375, "loss": 1.6017825603485107 }, { "id": "identity_60338", "image": "DVQA/images/bar_train_00082729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the 'rhythm' group?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 23.375, "loss": 2.4096851348876953 }, { "id": "identity_86395", "image": "Geometry3K/images/768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle opposite to the side with expression 7x - 4?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 33.1875, "loss": 1.9070672988891602 }, { "id": "identity_134643", "image": "ChartQA/images/7478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of refugee admissions during the first four weeks of Trump's presidency?" }, { "from": "gpt", "value": "The answer is 6,095" } ], "clip_score": 25.25, "loss": 2.5212104320526123 }, { "id": "identity_73660", "image": "AI2D/images/468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role do dead leaves play in this food web?" }, { "from": "gpt", "value": "The answer is Producer" } ], "clip_score": 34.96875, "loss": 4.588207721710205 }, { "id": "identity_107374", "image": "VQA2.0/images/475019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the rooftops of the buildings in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 13.9140625, "loss": 3.8764584064483643 }, { "id": "identity_311964", "image": "AI2D/images/3232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is A in the image?\nChoices:\n(A) cell membrane\n(B) cell wall\n(C) vesicle\n(D) chloroplast" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.296875, "loss": 3.835359573364258 }, { "id": "identity_5190", "image": "UniGeo/images/1016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,∠OAB=45°,则∠ACB的度数为()\nChoices:\n(A) 135°\n(B) 130°\n(C) 120°\n(D) 140°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.609375, "loss": 2.4586102962493896 }, { "id": "identity_320605", "image": "IconQA/images/61338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are in this?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.03125, "loss": 2.234023332595825 }, { "id": "identity_148700", "image": "DocVQA/images/xfhb0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the presentation scheduled at 2:30 p.m. on October 2, 1963?" }, { "from": "gpt", "value": "The answer is A Review of Poliomyelitis Immunization Procedures" } ], "clip_score": 25.203125, "loss": 1.8251985311508179 }, { "id": "identity_295903", "image": "FigureQA/images/91303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green the one with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.875, "loss": 4.373763084411621 }, { "id": "identity_59276", "image": "DocVQA/images/ffjv0228_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How soon should irrigation occur after applying preplant herbicide?" }, { "from": "gpt", "value": "The answer is Within 3 days" } ], "clip_score": 25.296875, "loss": 2.891017198562622 }, { "id": "identity_283250", "image": "VQA-AS/images/1840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the breed of the dog?" }, { "from": "gpt", "value": "The answer is chihuahua" } ], "clip_score": 22.734375, "loss": 2.6183977127075195 }, { "id": "identity_93867", "image": "TabMWP/images/15649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 1 tray is added, how many coconut bars will there be in total?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 33.875, "loss": 2.5322494506835938 }, { "id": "identity_273908", "image": "A-OKVQA/images/207548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a possible outcome of sitting on this bench? \n(A) experience seasickness\n(B) get sunburned\n(C) get lost\n(D) have sandy feet" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 4.663259029388428 }, { "id": "identity_52145", "image": "FigureQA/images/99327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value achieved by Crimson on the graph?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 23.71875, "loss": 2.7714459896087646 }, { "id": "identity_176449", "image": "IconQA/images/19384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more donuts are added to the image, how many donuts will there be in total?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 30.90625, "loss": 1.9499760866165161 }, { "id": "identity_137932", "image": "ChartQA/images/4522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the majority of College degree holders support the Democratic Party?" }, { "from": "gpt", "value": "The answer is No, it's a tie at 49%" } ], "clip_score": 33.46875, "loss": 2.5493428707122803 }, { "id": "identity_178058", "image": "IconQA/images/102012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock gains 5 minutes every hour, what will be the actual time when the clock shows 4:00 PM?" }, { "from": "gpt", "value": "The answer is 4:05 PM" } ], "clip_score": 33.6875, "loss": 1.3628870248794556 }, { "id": "identity_146923", "image": "DocVQA/images/mlcj0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the President signing the memorandum?" }, { "from": "gpt", "value": "The answer is William J. Darby, M.D., Ph.D" } ], "clip_score": 26.109375, "loss": 2.2093758583068848 }, { "id": "identity_271999", "image": "TextVQA/images/05f850bedfc426be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the book?" }, { "from": "gpt", "value": "The answer is mists of everness" } ], "clip_score": 32.40625, "loss": 2.23403263092041 }, { "id": "identity_282467", "image": "VQA2.0/images/232250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the chairs?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 18.78125, "loss": 4.211350917816162 }, { "id": "identity_276020", "image": "MapQA/images/map_6806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Alaska equivalent to that of New Hampshire?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.546875, "loss": 2.97064471244812 }, { "id": "identity_124489", "image": "VQA2.0/images/303140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the elephant's skin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 27.40625, "loss": 4.274747371673584 }, { "id": "identity_53986", "image": "FigureQA/images/73370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of Turquoise on the y-axis label?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 23.25, "loss": 3.031932830810547 }, { "id": "identity_264148", "image": "PlotQA/images/89353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there grids present in the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.0625, "loss": 3.2581772804260254 }, { "id": "identity_65805", "image": "ScienceQA/images/1698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What poles of the magnets are facing each other?" }, { "from": "gpt", "value": "The answer is South and North" } ], "clip_score": 30.0625, "loss": 2.664644956588745 }, { "id": "identity_20513", "image": "DocVQA/images/ygfd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is telephone number of the American Institute of Nutrition?" }, { "from": "gpt", "value": "The answer is (301) 530-7050" } ], "clip_score": 18.265625, "loss": 1.3763498067855835 }, { "id": "identity_78317", "image": "AI2D/images/3693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which gland is the largest among the three shown?" }, { "from": "gpt", "value": "The answer is Parotid Gland" } ], "clip_score": 25.5, "loss": 1.802664875984192 }, { "id": "identity_200584", "image": "PMC-VQA/images/PMC5086042_Fig8_89299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the lateral ventricles be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes, the lateral ventricles are visible" } ], "clip_score": 32.5, "loss": 1.9066904783248901 }, { "id": "identity_64880", "image": "ScienceQA/images/13301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these objects typically used by a specific age group?" }, { "from": "gpt", "value": "The answer is Yes, children or students" } ], "clip_score": 17.921875, "loss": 3.9999372959136963 }, { "id": "identity_293594", "image": "GeoQA+/images/2532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, AD is parallel to BE and CF, and lines l1 and l2 intersect these three parallel lines at points A, B, C and points D, E, F respectively. If AB = 2, AC = 6, and DE = 1.5, what is the length of DF?\nChoices:\n(A) 7.5\n(B) 6\n(C) 4.5\n(D) 3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.28125, "loss": 2.4715030193328857 }, { "id": "identity_66725", "image": "ScienceQA/images/2422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the body of water located to the south of the highlighted country?" }, { "from": "gpt", "value": "The answer is Caribbean Sea" } ], "clip_score": 32.15625, "loss": 1.983790397644043 }, { "id": "identity_63307", "image": "ScienceQA/images/20356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the magnets arranged in a way that they will attract or repel each other?" }, { "from": "gpt", "value": "The answer is Attract" } ], "clip_score": 21.0625, "loss": 2.476487398147583 }, { "id": "identity_101307", "image": "TabMWP/images/6082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many key chains were made in total by the volunteers?" }, { "from": "gpt", "value": "The answer is 205" } ], "clip_score": 31.1875, "loss": 2.2507517337799072 }, { "id": "identity_155163", "image": "AI2D/images/2951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which simple machine can be used to reduce the effort needed to move an object up a height?" }, { "from": "gpt", "value": "The answer is Inclined Plane" } ], "clip_score": 35.21875, "loss": 1.661426305770874 }, { "id": "identity_184069", "image": "PMC-VQA/images/PMC3748773_fig9_225834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 33.34375, "loss": 2.4980170726776123 }, { "id": "identity_64198", "image": "ScienceQA/images/11729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean borders the highlighted state to the east?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 23.9375, "loss": 2.4482240676879883 }, { "id": "identity_40294", "image": "MapQA/images/map_5242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 45.4%-53.6%" } ], "clip_score": 30.78125, "loss": 1.8186618089675903 }, { "id": "identity_329954", "image": "IconQA/images/10415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) neither; white and green are equally likely\n(B) white\n(C) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.251405715942383 }, { "id": "identity_156685", "image": "ScienceQA/images/18502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the final temperature of the salmon on the left half of the grill compared to the salmon on the right half of the grill?" }, { "from": "gpt", "value": "The answer is 8°C" } ], "clip_score": 25.40625, "loss": 2.4268736839294434 }, { "id": "identity_147040", "image": "DocVQA/images/zqvw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many questions will James Fiorica, MD address in his presentation?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.796875, "loss": 2.6496875286102295 }, { "id": "identity_328919", "image": "GeoQA+/images/4310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C是圆上的点,若∠AOB=60°,则∠ACB的度数为()\n选项:\n(A) 60°\n(B) 50°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.40625, "loss": 2.871157169342041 }, { "id": "identity_235373", "image": "DocVQA/images/snyc0227_221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of persons examined in Selangor, Johore, and Kedah combined?" }, { "from": "gpt", "value": "The answer is 555" } ], "clip_score": 19.234375, "loss": 2.4683823585510254 }, { "id": "identity_285463", "image": "GeoQA+/images/2852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure, a strip of paper with parallel edges is folded twice, creating creases AB and CD. If CD is parallel to BE and angle 2 is 66°, what is the degree measure of angle 1?\nChoices:\n(A) 48°\n(B) 57°\n(C) 60°\n(D) 66°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.5, "loss": 2.447108745574951 }, { "id": "identity_244602", "image": "PMC-VQA/images/PMC7939567_F0002_386115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnetic field strength of the MRI used to reveal the thalamus infarction, and how does it compare to the magnetic field strength of a 1T MRI in terms of signal-to-noise ratio (SNR)?" }, { "from": "gpt", "value": "The answer is 3T MRI has three times the SNR of 1T MRI" } ], "clip_score": 29.8125, "loss": 2.372101306915283 }, { "id": "identity_273379", "image": "TextVQA/images/725bb7268d230572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is displayed on the player's jersey?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 14.46875, "loss": 2.2870380878448486 }, { "id": "identity_281428", "image": "TextVQA/images/e7c20f9a702ce0da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the word \"designer\" written on the picture on the right?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.0, "loss": 3.8515772819519043 }, { "id": "identity_226028", "image": "FigureQA/images/68272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Purple line intersect with any other lines in the graph?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.125, "loss": 1.833534598350525 }, { "id": "identity_262190", "image": "TabMWP/images/15056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Janet requires to purchase a banana split and a slice of cake?" }, { "from": "gpt", "value": "The answer is 7.68" } ], "clip_score": 32.09375, "loss": 2.1809170246124268 }, { "id": "identity_85229", "image": "UniGeo/images/1986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,PA,PB分别与⊙O相切于A、B两点.直线EF切⊙O于C点,分别交PA、PB于E、F,且PA=10.求点C到直线PA的距离是多少?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.421875, "loss": 2.155238628387451 }, { "id": "identity_186843", "image": "Super-CLEVR/images/superCLEVR_new_001181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Purple and green" } ], "clip_score": 23.53125, "loss": 3.306307792663574 }, { "id": "identity_94218", "image": "TabMWP/images/285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a recipe calls for 1/2 pound of romaine lettuce, how much will it cost?" }, { "from": "gpt", "value": "The answer is $1.50" } ], "clip_score": 29.59375, "loss": 1.5317628383636475 }, { "id": "identity_315614", "image": "ScienceQA/images/20355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is shown in the image?\nChoices:\n(A) South America\n(B) North America\n(C) Asia\n(D) Europe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.453125, "loss": 4.025728702545166 }, { "id": "identity_138061", "image": "DocVQA/images/hmvj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the incorporation year of the company?" }, { "from": "gpt", "value": "The answer is 1970" } ], "clip_score": 15.578125, "loss": 2.114407539367676 }, { "id": "identity_90867", "image": "GeoQA+/images/226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If PA and PB are tangents to circle O at points A and B respectively, and angle P measures 50 degrees, what is the perimeter of triangle PAB?" }, { "from": "gpt", "value": "The answer is approximately 29.54 units" } ], "clip_score": 31.71875, "loss": 2.173076868057251 }, { "id": "identity_230090", "image": "DVQA/images/bar_train_00154141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms on the 'meat' dataset?" }, { "from": "gpt", "value": "The answer is 4.8" } ], "clip_score": 36.65625, "loss": 2.383206844329834 }, { "id": "identity_227346", "image": "IconQA/images/96119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jenny starts riding her bike at 6:00 P.M. and rides for 1 hour and 45 minutes, what time will it be when she finishes her ride?" }, { "from": "gpt", "value": "The answer is 7:45 P.M" } ], "clip_score": 26.796875, "loss": 1.1008925437927246 }, { "id": "identity_216457", "image": "VQA2.0/images/465005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man lying on the grass doing?" }, { "from": "gpt", "value": "The answer is Lying down or resting" } ], "clip_score": 25.765625, "loss": 3.1224300861358643 }, { "id": "identity_43642", "image": "DVQA/images/bar_train_00030644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of blue dots across all categories?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 22.828125, "loss": 2.22568941116333 }, { "id": "identity_214236", "image": "VQA2.0/images/134177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Cloudy or stormy" } ], "clip_score": 20.421875, "loss": 2.686802387237549 }, { "id": "identity_136172", "image": "ChartQA/images/multi_col_325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of respondents said there is more than enough infrastructure or number of hospitals?" }, { "from": "gpt", "value": "The answer is 66.1%" } ], "clip_score": 32.1875, "loss": 1.8753706216812134 }, { "id": "identity_155994", "image": "AI2D/images/2197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What happens to the adults of the organism on the right side of the image after winter?" }, { "from": "gpt", "value": "The answer is They do not survive" } ], "clip_score": 24.90625, "loss": 3.0831620693206787 }, { "id": "identity_99247", "image": "TabMWP/images/9957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest number of e-mails sent in a day?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.59375, "loss": 2.5280091762542725 }, { "id": "identity_191500", "image": "PMC-VQA/images/PMC7152803_f3_150610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a yellow arrow in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.40625, "loss": 3.204336166381836 }, { "id": "identity_212031", "image": "PMC-VQA/images/PMC8664363_FIG1_136868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a symmetry between the two hemispheres of the brain in this image?" }, { "from": "gpt", "value": "The answer is No, there is asymmetry due to a dark area in the left hemisphere" } ], "clip_score": 28.078125, "loss": 2.111874580383301 }, { "id": "identity_234260", "image": "DVQA/images/bar_train_00159135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different patterns are used in the bars, and which categories do they correspond to?" }, { "from": "gpt", "value": "The answer is three patterns: reform, drunk, answer" } ], "clip_score": 28.40625, "loss": 3.0169477462768555 }, { "id": "identity_34282", "image": "TextVQA/images/42292cf946584d67.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: he will re spawn in how much time?" }, { "from": "gpt", "value": "The answer is 16 seconds" } ], "clip_score": 21.90625, "loss": 1.8745111227035522 }, { "id": "identity_175568", "image": "IconQA/images/34343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are selected without looking, what is the probability that both are purple?" }, { "from": "gpt", "value": "The answer is (6/13) * (5/12)" } ], "clip_score": 31.890625, "loss": 1.650111436843872 }, { "id": "identity_6401", "image": "Geometry3K/images/375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the measure of \\angle K L M.\nChoices:\n(A) 32\n(B) 44\n(C) 78\n(D) 94" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.9375, "loss": 3.2183852195739746 }, { "id": "identity_112798", "image": "TextVQA/images/5f5aef36f114d33b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the motto written on the seal?" }, { "from": "gpt", "value": "The answer is E PLURIBUS UNUM" } ], "clip_score": 28.71875, "loss": 1.8184185028076172 }, { "id": "identity_268654", "image": "ScienceQA/images/5728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the anticipated ratio of offspring with Marfan syndrome to those without Marfan syndrome? Select the most probable ratio from the following choices.\nChoices:\n(A) 3:1\n(B) 2:2\n(C) 4:0\n(D) 0:4\n(E) 1:3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.484375, "loss": 3.7783424854278564 }, { "id": "identity_237769", "image": "Geometry3K/images/300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of the tangent segment \\( ST \\) from point \\( T \\) to point \\( S \\) on the circle, given that \\( RT = 20 \\) and \\( RS = x \\), where \\( x \\) is the radius of the circle. Round to the nearest tenth" }, { "from": "gpt", "value": "The answer is 16.9" } ], "clip_score": 34.1875, "loss": 2.1727585792541504 }, { "id": "identity_128604", "image": "VizWiz/images/VizWiz_train_00017851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the leaves of the purple flower at the bottom?" }, { "from": "gpt", "value": "The answer is Smooth and waxy" } ], "clip_score": 23.90625, "loss": 3.3718292713165283 }, { "id": "identity_145352", "image": "ChartQA/images/05411753006467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the projected mismanaged plastic waste for the Dominican Republic more than 20 times that of Guadeloupe?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.8125, "loss": 3.1582961082458496 }, { "id": "identity_215541", "image": "VizWiz/images/VizWiz_train_00007867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the chair?" }, { "from": "gpt", "value": "The answer is Used with visible wear" } ], "clip_score": 28.109375, "loss": 3.6506946086883545 }, { "id": "identity_291152", "image": "TextVQA/images/a2abff68ff57ac45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a mention of a miner in the back?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.625, "loss": 3.5539631843566895 }, { "id": "identity_156083", "image": "ScienceQA/images/1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of West Virginia?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 25.140625, "loss": 3.161247730255127 }, { "id": "identity_34981", "image": "PMC-VQA/images/PMC7348129_Fig4_209432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What medical conditions have been ruled out in the patient? \nChoices:\n(A) Stroke and heart attack \n(B) Asthma and bronchitis \n(C) Pulmonary embolism and deep vein thrombosis \n(D) Malaria and yellow fever " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 3.3300018310546875 }, { "id": "identity_178694", "image": "TabMWP/images/30788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is the navy blue bath mat compared to the yellow bath towel?" }, { "from": "gpt", "value": "The answer is $9.00" } ], "clip_score": 38.90625, "loss": 1.5750161409378052 }, { "id": "identity_44833", "image": "MapQA/images/map_1579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color shade representing the range 2220132-2300234?" }, { "from": "gpt", "value": "The answer is Dark Brown" } ], "clip_score": 21.203125, "loss": 4.695478439331055 }, { "id": "identity_237201", "image": "PMC-VQA/images/PMC7724709_Fig1_316990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the specific steps involved in an anterior cervical corpectomy and fusion, and how do they address the patient's condition as seen in the image?" }, { "from": "gpt", "value": "The answer is Steps include preoperative planning, anesthesia, incision, exposure, corpectomy, fusion, fixation, and closure" } ], "clip_score": 27.1875, "loss": 2.074218511581421 }, { "id": "identity_29987", "image": "DocVQA/images/rsdw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the document?" }, { "from": "gpt", "value": "The answer is 1/12/04" } ], "clip_score": 15.9375, "loss": 2.191244125366211 }, { "id": "identity_160730", "image": "GeoQA+/images/2496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment PM if OP is 10 and angle P1 is 15 degrees?" }, { "from": "gpt", "value": "The answer is 5√3" } ], "clip_score": 29.453125, "loss": 2.2024776935577393 }, { "id": "identity_199295", "image": "PMC-VQA/images/PMC7763342_brainsci-10-00963-f002_328088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of an aneurysm in the indicated area?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.359375, "loss": 2.7274296283721924 }, { "id": "identity_67431", "image": "ScienceQA/images/6638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of locomotion is the cheetah using in the image?" }, { "from": "gpt", "value": "The answer is The cheetah is running" } ], "clip_score": 34.21875, "loss": 2.6394145488739014 }, { "id": "identity_199336", "image": "TextVQA/images/1a681a5267412884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many app icons are displayed at the bottom of the screen?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 18.109375, "loss": 2.3162577152252197 }, { "id": "identity_296984", "image": "A-OKVQA/images/238385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which zone is shown in the image provided? \nOptions:\n(A) residential\n(B) traffic\n(C) business\n(D) shopping" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.65625, "loss": 4.299054145812988 }, { "id": "identity_333818", "image": "IconQA/images/63586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Carla is outside in the snow one morning. Her watch shows a time. What time is it?\nChoices:\n(A) 11:30 P.M.\n(B) 11:30 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.96875, "loss": 3.5613346099853516 }, { "id": "identity_206793", "image": "A-OKVQA/images/287305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the destination displayed on the front of the blue bus?" }, { "from": "gpt", "value": "The answer is Escolta" } ], "clip_score": 28.5625, "loss": 3.0632309913635254 }, { "id": "identity_116441", "image": "Super-CLEVR/images/superCLEVR_new_000578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects that are the same color as the green school bus?" }, { "from": "gpt", "value": "The answer is Yes, the green motorbike" } ], "clip_score": 27.609375, "loss": 3.2138173580169678 }, { "id": "identity_228310", "image": "CLEVR-Math/images/CLEVR_train_047033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all big shiny cubes and all small shiny spheres. How many objects are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.921875, "loss": 2.1344082355499268 }, { "id": "identity_99357", "image": "IconQA/images/35536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are squares but not green?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.4375, "loss": 2.4761364459991455 }, { "id": "identity_277594", "image": "Geometry3K/images/1243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x that would make the quadrilateral a parallelogram.\nChoices:\n(A) 2\n(B) 4\n(C) 9\n(D) 11" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5, "loss": 2.7109341621398926 }, { "id": "identity_173740", "image": "IconQA/images/96923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the circles the same size?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.46875, "loss": 3.1526570320129395 }, { "id": "identity_224234", "image": "DVQA/images/bar_train_00021000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'sphere' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is approximately 4.29" } ], "clip_score": 34.1875, "loss": 2.4511876106262207 }, { "id": "identity_76674", "image": "AI2D/images/4379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which drawing shows a foot adapted for swimming?" }, { "from": "gpt", "value": "The answer is Drawing (b)" } ], "clip_score": 27.171875, "loss": 2.2129673957824707 }, { "id": "identity_268240", "image": "VQA2.0/images/388149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the photo contain the American flag?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.40625, "loss": 4.000892639160156 }, { "id": "identity_241781", "image": "UniGeo/images/1842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, BC=\\frac{1}{2}AB, D为AC的中点, DC=3cm, 若E为AB的中点, 求DE的长是多少?" }, { "from": "gpt", "value": "The answer is \\sqrt{6.5} cm" } ], "clip_score": 26.46875, "loss": 2.6967203617095947 }, { "id": "identity_197622", "image": "A-OKVQA/images/491000.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are walking on the sidewalk?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 23.671875, "loss": 2.2755720615386963 }, { "id": "identity_76625", "image": "ScienceQA/images/17165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter on the map represents the ancient civilization that developed around the Tigris and Euphrates rivers?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.34375, "loss": 2.736389636993408 }, { "id": "identity_61021", "image": "FigureQA/images/85481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the highest value at the 50 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is Medium Orchid" } ], "clip_score": 28.390625, "loss": 2.1026179790496826 }, { "id": "identity_68889", "image": "ScienceQA/images/1032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is highlighted in dark green on the map?" }, { "from": "gpt", "value": "The answer is Arizona" } ], "clip_score": 36.1875, "loss": 2.725132465362549 }, { "id": "identity_285870", "image": "VQA2.0/images/330516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this located in America?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.375, "loss": 3.4092414379119873 }, { "id": "identity_270334", "image": "DocVQA/images/qgng0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the required number mentioned on the page provided?" }, { "from": "gpt", "value": "The answer is A 42" } ], "clip_score": 13.6953125, "loss": 3.53621506690979 }, { "id": "identity_175392", "image": "IconQA/images/21394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many black marbles are there in total?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.125, "loss": 1.9073899984359741 }, { "id": "identity_39524", "image": "FigureQA/images/13927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the Red and Pale Green sections together more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 39.4375, "loss": 3.117912769317627 }, { "id": "identity_166500", "image": "IconQA/images/9061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the circles in the flask the same size?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.140625, "loss": 3.2997145652770996 }, { "id": "identity_130587", "image": "A-OKVQA/images/295499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is visible on the jersey of the player attempting to block the shot?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 19.625, "loss": 2.1635360717773438 }, { "id": "identity_122836", "image": "PMC-VQA/images/PMC8137781_Fig9_446471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of radiograph view is this image showing?" }, { "from": "gpt", "value": "The answer is Lateral view" } ], "clip_score": 32.34375, "loss": 3.230849027633667 }, { "id": "identity_110757", "image": "A-OKVQA/images/56040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many drinking glasses can be seen on the table?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.953125, "loss": 4.48529577255249 }, { "id": "identity_172454", "image": "IconQA/images/45652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one star, how many points will remain in total?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 27.078125, "loss": 2.1257474422454834 }, { "id": "identity_286605", "image": "PMC-VQA/images/PMC6594495_os12432-fig-0001_203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of fracture displayed in the radiograph:\nOptions:\n(A) spiral fracture\n(B) comminuted fracture\n(C) greenstick fracture\n(D) oblique fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.375, "loss": 4.762259483337402 }, { "id": "identity_186837", "image": "VizWiz/images/VizWiz_train_00007537.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the net weight of the soup can?" }, { "from": "gpt", "value": "The answer is 18.5 oz (1 lb 2.5 oz) 524g" } ], "clip_score": 32.1875, "loss": 1.7907967567443848 }, { "id": "identity_155470", "image": "ScienceQA/images/10936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean lies to the east of the highlighted country?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 31.34375, "loss": 2.5745768547058105 }, { "id": "identity_275484", "image": "TextVQA/images/7cf080f4b0153323.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the book?" }, { "from": "gpt", "value": "The answer is around the world in 80 days" } ], "clip_score": 19.796875, "loss": 1.2329667806625366 }, { "id": "identity_332082", "image": "CLEVR-Math/images/CLEVR_train_012187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all cubes. How many items remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.546875, "loss": 1.9801794290542603 }, { "id": "identity_238637", "image": "ChartQA/images/8774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of adults surveyed if 18% of them strongly support the Black Lives Matter movement and this represents 180 individuals?" }, { "from": "gpt", "value": "The answer is 1000" } ], "clip_score": 33.46875, "loss": 1.5599687099456787 }, { "id": "identity_325871", "image": "VQA2.0/images/145891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this animal doing?" }, { "from": "gpt", "value": "The answer is laying on bed" } ], "clip_score": 27.84375, "loss": 2.4974732398986816 }, { "id": "identity_102766", "image": "TabMWP/images/11372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Audrey has $1.00. She buys five elastic bands. How much money will she have left?" }, { "from": "gpt", "value": "The answer is $0.60" } ], "clip_score": 22.9375, "loss": 1.9540209770202637 }, { "id": "identity_57284", "image": "DocVQA/images/ymbv0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the start and end times of the reception?" }, { "from": "gpt", "value": "The answer is 7:00 PM to 9:00 PM" } ], "clip_score": 18.1875, "loss": 1.1259222030639648 }, { "id": "identity_76588", "image": "AI2D/images/806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a secondary consumer in the given food web?" }, { "from": "gpt", "value": "The answer is Weasel, bank vole, and shrew" } ], "clip_score": 27.75, "loss": 2.7387914657592773 }, { "id": "identity_35456", "image": "VQA2.0/images/88274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this in the USA?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 11.7578125, "loss": 3.162869691848755 }, { "id": "identity_325016", "image": "A-OKVQA/images/392659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the animal positioned while riding on the bike?\nChoices:\n(A) handlebar\n(B) lap\n(C) basket\n(D) seat" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 4.201257228851318 }, { "id": "identity_321168", "image": "PMC-VQA/images/PMC6899010_DMM042200F1_80328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being imaged here?\nChoices:\n(A) Dopamine\n(B) Serotonin\n(C) Tyrosine hydroxylase\n(D) Glutamate" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.21875, "loss": 3.9615771770477295 }, { "id": "identity_62720", "image": "ScienceQA/images/15208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distinctive color marking on the bird's head?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 31.375, "loss": 3.713768720626831 }, { "id": "identity_154435", "image": "ScienceQA/images/13094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fingers is the individual using to draw the bowstring?" }, { "from": "gpt", "value": "The answer is Three fingers" } ], "clip_score": 19.125, "loss": 3.9592626094818115 }, { "id": "identity_185399", "image": "A-OKVQA/images/75213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the walls in this room?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 24.65625, "loss": 3.9259796142578125 }, { "id": "identity_45349", "image": "FigureQA/images/52825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the majority in the pie chart?" }, { "from": "gpt", "value": "The answer is Crimson" } ], "clip_score": 34.75, "loss": 2.7649483680725098 }, { "id": "identity_101515", "image": "CLEVR-Math/images/CLEVR_train_025491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of spheres present in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.5, "loss": 2.106377124786377 }, { "id": "identity_324098", "image": "GeoQA+/images/1609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图∠A是⊙O的圆周角,∠A=50°,则∠OBC的度数是多少?\n\n选项:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.25, "loss": 2.8277578353881836 }, { "id": "identity_209076", "image": "VQA2.0/images/151709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aircraft is shown in the image?" }, { "from": "gpt", "value": "The answer is Seaplane" } ], "clip_score": 29.671875, "loss": 2.663332223892212 }, { "id": "identity_327669", "image": "TabMWP/images/4369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Deion joined a book club to spend more quality time with his cousin. At the first meeting, club members recorded how many hours a week they typically read and their preference for e-readers or paperback books. What is the probability that a randomly chosen club member prefers e-readers and reads about 3 hours per week? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 3/14" } ], "clip_score": 20.078125, "loss": 1.8204345703125 }, { "id": "identity_142477", "image": "DocVQA/images/qtfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the meeting mentioned in the letter?" }, { "from": "gpt", "value": "The answer is Skytop meeting" } ], "clip_score": 19.703125, "loss": 3.460460901260376 }, { "id": "identity_13759", "image": "DVQA/images/bar_train_00193422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items sold less than 1 units in at least one store?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 31.09375, "loss": 3.6865437030792236 }, { "id": "identity_246442", "image": "DocVQA/images/rtjp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours and minutes are allocated for the entire agenda, excluding lunch?" }, { "from": "gpt", "value": "The answer is 6 hours and 15 minutes" } ], "clip_score": 19.0, "loss": 1.4098998308181763 }, { "id": "identity_135319", "image": "DVQA/images/bar_train_00184771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between 'dome' and 'cost' for the group 'plate'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.0, "loss": 2.5345804691314697 }, { "id": "identity_17530", "image": "TabMWP/images/17269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Some dealerships compared their vehicle sales. Which dealership sold the fewest vehicles?\nChoices:\n(A) Truck City\n(B) RV City\n(C) Cars of Pryor Falls\n(D) Affordable Cars" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.46875, "loss": 3.719041109085083 }, { "id": "identity_273955", "image": "GEOS/images/065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that the measure of ABD is 3*x-4, DBE is x-3, ABC is 31, and EBC is 2*x-10, what is the value of x?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.515625, "loss": 2.8472719192504883 }, { "id": "identity_179956", "image": "IconQA/images/87634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of yellow fish to red fish?" }, { "from": "gpt", "value": "The answer is 4:2" } ], "clip_score": 26.359375, "loss": 1.6114341020584106 }, { "id": "identity_149883", "image": "DocVQA/images/qspp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What college is associated with this report?" }, { "from": "gpt", "value": "The answer is COLLEGE OF MEDICINE" } ], "clip_score": 21.421875, "loss": 1.6887761354446411 }, { "id": "identity_92403", "image": "IconQA/images/3404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each part of the shape costs $2, how much more does the white portion cost than the orange portion?" }, { "from": "gpt", "value": "The answer is $4 more" } ], "clip_score": 29.828125, "loss": 2.7539427280426025 }, { "id": "identity_75180", "image": "TQA/images/question_images/periodic_table_7374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the atomic number of the element represented by the symbol 'Au'?" }, { "from": "gpt", "value": "The answer is 79" } ], "clip_score": 26.90625, "loss": 1.5047141313552856 }, { "id": "identity_255206", "image": "DocVQA/images/zqdw0217_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of CE taken by all patients in Treatment Group A over a 28-day cycle?" }, { "from": "gpt", "value": "The answer is 17.5 mg" } ], "clip_score": 24.734375, "loss": 1.6957017183303833 }, { "id": "identity_108629", "image": "A-OKVQA/images/198545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of objects are hanging from the ceiling?" }, { "from": "gpt", "value": "The answer is Decorative umbrellas" } ], "clip_score": 31.578125, "loss": 2.109135866165161 }, { "id": "identity_132827", "image": "VQA2.0/images/539180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand advertisement can be seen on the outfield wall?" }, { "from": "gpt", "value": "The answer is Powerade" } ], "clip_score": 26.734375, "loss": 2.981109857559204 }, { "id": "identity_158884", "image": "ScienceQA/images/13360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lone pairs of electrons are present on the oxygen atom in the compound shown?" }, { "from": "gpt", "value": "The answer is Two lone pairs" } ], "clip_score": 19.046875, "loss": 3.1649105548858643 }, { "id": "identity_331869", "image": "IconQA/images/49467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a paw. What is the third image?\nChoices:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.712176322937012 }, { "id": "identity_197209", "image": "A-OKVQA/images/335784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.0, "loss": 3.140510082244873 }, { "id": "identity_141909", "image": "DocVQA/images/tsgx0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on which the service was billed?" }, { "from": "gpt", "value": "The answer is December 12, 1997" } ], "clip_score": 27.796875, "loss": 1.7102630138397217 }, { "id": "identity_114307", "image": "Super-CLEVR/images/superCLEVR_new_001148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of flying objects to land vehicles in the image?" }, { "from": "gpt", "value": "The answer is 2:3" } ], "clip_score": 23.109375, "loss": 1.8491543531417847 }, { "id": "identity_111214", "image": "VQA-RAD/images/synpic29795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of abnormal masses or growths?" }, { "from": "gpt", "value": "The answer is The image may show signs of a mass or growth" } ], "clip_score": 25.265625, "loss": 3.2632243633270264 }, { "id": "identity_82055", "image": "Geometry3K/images/1956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle EHG?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 32.09375, "loss": 1.8220584392547607 }, { "id": "identity_52533", "image": "FigureQA/images/68665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color line has the highest peak value on the graph?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 28.03125, "loss": 2.51324462890625 }, { "id": "identity_48193", "image": "FigureQA/images/97287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the distribution of Rebecca Purple data points more concentrated towards the lower or higher end of the y-axis?" }, { "from": "gpt", "value": "The answer is Lower or Higher" } ], "clip_score": 30.0625, "loss": 3.747635841369629 }, { "id": "identity_160167", "image": "GeoQA+/images/4861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if the length of segment BM is 4 units and the length of segment CN is 3 units, what is the length of segment FG?" }, { "from": "gpt", "value": "The answer is FG = 2MG (or 2NG)" } ], "clip_score": 30.8125, "loss": 2.591421604156494 }, { "id": "identity_189804", "image": "A-OKVQA/images/34624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the exercise ball chair?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.203125, "loss": 3.4036173820495605 }, { "id": "identity_290207", "image": "PMC-VQA/images/PMC4195433_fig7_327356.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After the embolization of the right IIA, which procedure was conducted next?\nChoices:\n(A) Balloon Angiography\n(B) Completion Angiography\n(C) CT Scan\n(D) MRI Scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.921875, "loss": 3.710566997528076 }, { "id": "identity_252659", "image": "DocVQA/images/nhmm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total grant amount for the entire three-year period, and what percentage of the total grant amount does the partial payment of $73,500.00 represent?" }, { "from": "gpt", "value": "The answer is Total grant amount is $365,000.00, and the percentage is 20.14%" } ], "clip_score": 19.015625, "loss": 1.704127550125122 }, { "id": "identity_72181", "image": "ScienceQA/images/1903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object in the image appear to have a glossy or matte finish?" }, { "from": "gpt", "value": "The answer is Glossy" } ], "clip_score": 21.375, "loss": 2.5093023777008057 }, { "id": "identity_286363", "image": "TabMWP/images/12198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on Malia's record of daily emails received for a week, what was the difference in the number of emails between Wednesday and Thursday?" }, { "from": "gpt", "value": "The answer is -6" } ], "clip_score": 32.3125, "loss": 2.8128654956817627 }, { "id": "identity_111409", "image": "VQA2.0/images/390524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is predominantly taking place in the image?" }, { "from": "gpt", "value": "The answer is Flying kites" } ], "clip_score": 27.5625, "loss": 2.4987592697143555 }, { "id": "identity_333403", "image": "MapQA/images/map_5297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Georgia have the lowest value among the states neighboring North Carolina?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.8125, "loss": 2.6680798530578613 }, { "id": "identity_118433", "image": "TextVQA/images/f19424b9835199db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What action is the Santa Claus character performing in the last panel?" }, { "from": "gpt", "value": "The answer is Falling down the chimney" } ], "clip_score": 28.640625, "loss": 2.7405238151550293 }, { "id": "identity_222039", "image": "A-OKVQA/images/494579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are visible in the image?" }, { "from": "gpt", "value": "The answer is There are nine horses" } ], "clip_score": 22.125, "loss": 3.9012856483459473 }, { "id": "identity_330303", "image": "ScienceQA/images/10202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do the cells of Victoria amazonica contain a nucleus?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.375, "loss": 4.87233829498291 }, { "id": "identity_321378", "image": "TextVQA/images/41df32e563b8dcaf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this festival?" }, { "from": "gpt", "value": "The answer is 86 fest" } ], "clip_score": 17.484375, "loss": 2.0998036861419678 }, { "id": "identity_24984", "image": "A-OKVQA/images/517410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many geese are standing on the side of the marina?\nChoices:\n(A) four\n(B) two\n(C) five\n(D) three" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.96875, "loss": 3.220905303955078 }, { "id": "identity_15565", "image": "GeoQA+/images/2777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD为⊙O的内接四边形.延长AB与DC相交于点G,AO⊥CD,垂足为E,连接BD,∠GBC=50°,则∠DBC的度数为()\nChoices:\n(A) 50°\n(B) 60°\n(C) 80°\n(D) 90°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.53125, "loss": 2.802968978881836 }, { "id": "identity_257314", "image": "DocVQA/images/fpbw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days in total are covered by all the listed meetings in the document?" }, { "from": "gpt", "value": "The answer is 23 days" } ], "clip_score": 20.625, "loss": 2.003498077392578 }, { "id": "identity_165293", "image": "UniGeo/images/2847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BOC?" }, { "from": "gpt", "value": "The answer is 110 degrees" } ], "clip_score": 29.609375, "loss": 1.9089584350585938 }, { "id": "identity_191613", "image": "VQA2.0/images/396907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the banner closest to the lamp post?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.671875, "loss": 3.8545875549316406 }, { "id": "identity_183241", "image": "PMC-VQA/images/PMC8268606_Fig1_5131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical orientation is this brain image taken in?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 30.625, "loss": 3.1732852458953857 }, { "id": "identity_256474", "image": "TabMWP/images/30650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total surplus or shortage for all given prices combined" }, { "from": "gpt", "value": "The answer is 17,500" } ], "clip_score": 23.265625, "loss": 1.9079967737197876 }, { "id": "identity_240922", "image": "TQA/images/abc_question_images/parts_worm_17296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Trace the path of food from the mouth to the anus, listing all the organs it passes through in order, including the organ directly behind the crop" }, { "from": "gpt", "value": "The answer is Mouth, pharynx, esophagus, crop, gizzard, intestine, anus" } ], "clip_score": 31.140625, "loss": 1.0304102897644043 }, { "id": "identity_275559", "image": "TabMWP/images/10059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the students in Ms. Ellis's science class, which animal is the most popular among girls based on the votes after their trip to the zoo?\nChoices:\n(A) giraffes\n(B) penguins\n(C) hippopotamuses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.53125, "loss": 3.675579786300659 }, { "id": "identity_150365", "image": "ScienceQA/images/16628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is common among the three objects?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.203125, "loss": 4.011872291564941 }, { "id": "identity_284856", "image": "TabMWP/images/1420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In 1990, what was the difference in population density, in terms of people per square mile, between North Dakota and Montana as observed by Sue in the almanac at the library?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.21875, "loss": 2.499032974243164 }, { "id": "identity_290257", "image": "MapQA/images/map_4776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the worth or significance of South Carolina?" }, { "from": "gpt", "value": "The answer is 637326-638150" } ], "clip_score": 20.953125, "loss": 2.371734142303467 }, { "id": "identity_296418", "image": "GeoQA+/images/3202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C are all on circle O. If angle A is 64°, what is the degree measure of angle OCB?\nChoices:\n(A) 24°\n(B) 26°\n(C) 28°\n(D) 30°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.84375, "loss": 2.3601577281951904 }, { "id": "identity_298263", "image": "IconQA/images/35729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of hearts?\nChoices:\n(A) 4\n(B) 3\n(C) 2\n(D) 5\n(E) 1" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.34375, "loss": 3.1941170692443848 }, { "id": "identity_18990", "image": "DVQA/images/bar_train_00138240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units of the item pause were sold across all the stores?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 32.84375, "loss": 2.2112185955047607 }, { "id": "identity_129031", "image": "VQA-RAD/images/synpic49914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the diaphragms at the same level?" }, { "from": "gpt", "value": "The answer is No, the right diaphragm is higher" } ], "clip_score": 25.484375, "loss": 2.0125060081481934 }, { "id": "identity_140022", "image": "FigureQA/images/17690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the bar labeled \"Gold\"?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 22.5625, "loss": 2.483717679977417 }, { "id": "identity_185298", "image": "TextVQA/images/7a7ff8f10023d171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What day of the week is the market, according to the sign in the background?" }, { "from": "gpt", "value": "The answer is Friday" } ], "clip_score": 14.2265625, "loss": 3.569491147994995 }, { "id": "identity_283422", "image": "ScienceQA/images/13284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Iowa?\n(A) Des Moines\n(B) Madison\n(C) Bismarck\n(D) Saint Paul" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.078125, "loss": 4.119034767150879 }, { "id": "identity_237041", "image": "ChartQA/images/6062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the sum of the three smallest bars to the sum of the three largest bars?" }, { "from": "gpt", "value": "The answer is approximately 0.097" } ], "clip_score": 14.984375, "loss": 2.2378313541412354 }, { "id": "identity_279555", "image": "UniGeo/images/1393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, PA and PB are tangents to circle O at points A and B. If angle C is 65°, what is the degree measure of angle P?\nChoices:\n(A) 65°\n(B) 130°\n(C) 50°\n(D) 100°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.9375, "loss": 2.8507771492004395 }, { "id": "identity_319956", "image": "ScienceQA/images/8177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of this colony?\nChoices:\n(A) Wisconsin\n(B) South Carolina\n(C) Maryland\n(D) North Carolina" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.96875, "loss": 3.917201280593872 }, { "id": "identity_185775", "image": "TextVQA/images/a42b634f11fa317c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the hunchback using as a pillow when he is sleeping according to the text?" }, { "from": "gpt", "value": "The answer is His hunchback" } ], "clip_score": 20.625, "loss": 3.8373641967773438 }, { "id": "identity_210383", "image": "A-OKVQA/images/326217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the shirt of the person standing on the dock?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 12.828125, "loss": 3.7964253425598145 }, { "id": "identity_20277", "image": "TabMWP/images/18787.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: We've Got Rhythm dance studio teaches classes on many styles of dance. This frequency chart shows how many dancers are in each class. In all, how many dancers are in the two biggest classes?" }, { "from": "gpt", "value": "The answer is 115" } ], "clip_score": 37.75, "loss": 2.316653251647949 }, { "id": "identity_330721", "image": "ScienceQA/images/16909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these three items share?\nChoices:\n(A) blue\n(B) rough\n(C) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.4921875, "loss": 3.932374954223633 }, { "id": "identity_294593", "image": "A-OKVQA/images/138956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason are the individuals moving through the outdoor space?\nChoices:\n(A) to race\n(B) to compete\n(C) to escape\n(D) to shop" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.609375, "loss": 3.960010051727295 }, { "id": "identity_304569", "image": "IconQA/images/49971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner less likely to land on?\nChoices:\n(A) white\n(B) pink" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.890625, "loss": 3.94115948677063 }, { "id": "identity_233264", "image": "Super-CLEVR/images/superCLEVR_new_000336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of small bikes in the image, regardless of their type or color?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.96875, "loss": 2.460724115371704 }, { "id": "identity_78823", "image": "GeoQA+/images/2631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle EFG?" }, { "from": "gpt", "value": "The answer is 56 degrees" } ], "clip_score": 33.78125, "loss": 2.4768214225769043 }, { "id": "identity_23036", "image": "GeoQA+/images/5038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 在△ABC中,∠BAC=115°,DE、FG分别为AB、AC的垂直平分线,则∠EAG的度数为()\nChoices:\n(A) 50°\n(B) 40°\n(C) 30°\n(D) 25°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.0625, "loss": 2.5438904762268066 }, { "id": "identity_57559", "image": "FigureQA/images/71459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color line has the lowest starting point at x-axis value 0?" }, { "from": "gpt", "value": "The answer is Dark Red" } ], "clip_score": 25.546875, "loss": 2.656724214553833 }, { "id": "identity_43572", "image": "FigureQA/images/66738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of Dark Green on the y-axis?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 26.234375, "loss": 2.617286443710327 }, { "id": "identity_12976", "image": "ChartQA/images/14907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Support more then Oppose?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.421875, "loss": 3.482314348220825 }, { "id": "identity_248379", "image": "IconQA/images/60367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each dog needs 2 tennis balls, how many additional tennis balls are needed?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.15625, "loss": 2.3575637340545654 }, { "id": "identity_31231", "image": "A-OKVQA/images/11426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns Brough Superior motorcycles?\nChoices:\n(A) mark upham\n(B) george brough\n(C) lawrence\n(D) nicholace" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.078125, "loss": 3.633690595626831 }, { "id": "identity_236077", "image": "AI2D/images/4232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the rock types that are in direct contact with the granite intrusions and describe their relative positions" }, { "from": "gpt", "value": "The answer is Sedimentary Rocks, Ultramafic Volcanics, Mafic Volcanics, Granite Gneiss" } ], "clip_score": 38.46875, "loss": 1.1755067110061646 }, { "id": "identity_309662", "image": "VQA2.0/images/461705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the animal wearing over its eyes?" }, { "from": "gpt", "value": "The answer is sunglasses" } ], "clip_score": 26.484375, "loss": 2.48329758644104 }, { "id": "identity_111799", "image": "A-OKVQA/images/297876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tablet cover?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.390625, "loss": 4.804171562194824 }, { "id": "identity_156976", "image": "ScienceQA/images/8186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color theme used in the background of the advertisement?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.984375, "loss": 3.6652073860168457 }, { "id": "identity_222339", "image": "A-OKVQA/images/402967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles can be seen parked in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 20.296875, "loss": 4.1733808517456055 }, { "id": "identity_289913", "image": "A-OKVQA/images/329258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term is the individual in the air most acquainted with? \n(A) enzuiguiri\n(B) kickflip\n(C) quark\n(D) ad hoc" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5390625, "loss": 4.4024338722229 }, { "id": "identity_228409", "image": "Geometry3K/images/158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine the measure of \\( \\angle W \\)" }, { "from": "gpt", "value": "The answer is 108" } ], "clip_score": 29.28125, "loss": 3.559359073638916 }, { "id": "identity_74874", "image": "AI2D/images/1853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organisms are herbivores in this ecosystem?" }, { "from": "gpt", "value": "The answer is Deer, chipmunks, and squirrel" } ], "clip_score": 30.8125, "loss": 2.4362618923187256 }, { "id": "identity_82475", "image": "UniGeo/images/544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠MBF = 20°, what is the measure of ∠MBC?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 33.1875, "loss": 1.6355576515197754 }, { "id": "identity_292396", "image": "VQA2.0/images/399232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the text written in the English language?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.9921875, "loss": 3.86042857170105 }, { "id": "identity_338128", "image": "CLEVR-Math/images/CLEVR_train_069082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all brown cylinders. Remove all purple blocks. How many cylinders remain?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.296875, "loss": 2.474503517150879 }, { "id": "identity_298462", "image": "TabMWP/images/26970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jacob recorded the number of minutes he spent practicing soccer over the past 4 days. On which day did Jacob practice for the shortest amount of time? \nChoices:\n(A) Sunday\n(B) Monday\n(C) Tuesday\n(D) Wednesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.59375, "loss": 3.4982407093048096 }, { "id": "identity_87295", "image": "GeoQA+/images/3907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOE?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 28.609375, "loss": 1.7657127380371094 }, { "id": "identity_270796", "image": "AI2D/images/691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which celestial body is represented by the letter A?\nChoices:\n(A) Earth\n(B) Moon\n(C) Mars\n(D) Sun" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.109375, "loss": 3.746075391769409 }, { "id": "identity_62802", "image": "AI2D/images/3933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage comes after the pupa in the life cycle of an insect?" }, { "from": "gpt", "value": "The answer is Adult" } ], "clip_score": 30.265625, "loss": 2.2696259021759033 }, { "id": "identity_67091", "image": "TQA/images/question_images/cycle_rock_6739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the starting point of the rock cycle as depicted in this diagram?" }, { "from": "gpt", "value": "The answer is Magma" } ], "clip_score": 35.5, "loss": 2.599691152572632 }, { "id": "identity_122382", "image": "VQA2.0/images/30619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the snowboarder doing in the air?" }, { "from": "gpt", "value": "The answer is The snowboarder is performing a jump or aerial trick" } ], "clip_score": 32.15625, "loss": 2.0457582473754883 }, { "id": "identity_141603", "image": "FigureQA/images/13968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the color Cadet Blue?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 22.046875, "loss": 2.2487282752990723 }, { "id": "identity_32888", "image": "IconQA/images/88256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.34375, "loss": 3.839946746826172 }, { "id": "identity_117234", "image": "Super-CLEVR/images/superCLEVR_new_001568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more red objects or green objects in the image?" }, { "from": "gpt", "value": "The answer is More red objects" } ], "clip_score": 18.703125, "loss": 2.7147107124328613 }, { "id": "identity_167382", "image": "TabMWP/images/6854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was Sandra's available funds balance at the beginning of December?" }, { "from": "gpt", "value": "The answer is $323.25" } ], "clip_score": 29.265625, "loss": 2.0016355514526367 }, { "id": "identity_316375", "image": "GeoQA+/images/4212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线AB、CD相交于点O,若OE垂直于AB,∠DOE=58°,则∠AOC等于()\nChoices:\n(A) 32°\n(B) 42°\n(C) 48°\n(D) 58°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.921875, "loss": 2.568718671798706 }, { "id": "identity_286755", "image": "DocVQA/images/mhkm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who holds the position of superintendent of corporations?" }, { "from": "gpt", "value": "The answer is David H. Cole" } ], "clip_score": 23.125, "loss": 2.5711145401000977 }, { "id": "identity_63267", "image": "ScienceQA/images/2915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many planets in the solar system have a primary composition of gas or ice?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.28125, "loss": 2.3180582523345947 }, { "id": "identity_290599", "image": "PMC-VQA/images/PMC7298109_F1_193581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the head CT scan? \nChoices:\n(A) Abnormality in the gray matter \n(B) Abnormality in the white matter \n(C) Abnormality in the cerebrospinal fluid \n(D) Abnormality in the ventricles" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.34375, "loss": 4.097331523895264 }, { "id": "identity_259063", "image": "A-OKVQA/images/28589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary components and their functions in the item with the lid, and how do they work together to achieve its purpose?" }, { "from": "gpt", "value": "The answer is Tank, bowl, flush mechanism, fill valve, overflow tube, flapper" } ], "clip_score": 18.046875, "loss": 2.4106476306915283 }, { "id": "identity_127524", "image": "PMC-VQA/images/PMC6306407_F7_415054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of midline shift in this MRI image?" }, { "from": "gpt", "value": "The answer is No indication of midline shift" } ], "clip_score": 32.59375, "loss": 2.445202112197876 }, { "id": "identity_118913", "image": "TextVQA/images/c3bbbfde4e516c27.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What political party is advertised on the billboard?" }, { "from": "gpt", "value": "The answer is UKIP" } ], "clip_score": 32.125, "loss": 2.600165367126465 }, { "id": "identity_172931", "image": "IconQA/images/12927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangle vertices present in the image?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 28.671875, "loss": 2.410048246383667 }, { "id": "identity_30833", "image": "TabMWP/images/2722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Look at Janessa's pay stub. Janessa lives in a state without state income tax. How much payroll tax did Janessa pay in total?" }, { "from": "gpt", "value": "The answer is 1,087.52" } ], "clip_score": 32.4375, "loss": 2.0320332050323486 }, { "id": "identity_99990", "image": "TabMWP/images/9995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of times played?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.625, "loss": 2.726795196533203 }, { "id": "identity_94511", "image": "IconQA/images/101596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 4 circles to the bottom row, how many circles will be in the bottom row?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 35.375, "loss": 2.0193426609039307 }, { "id": "identity_86824", "image": "GeoQA+/images/4181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, AB=AC, and the angle bisector of ∠BAC intersects the perpendicular bisector of AB at point O. If ∠BAC = 58°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 58°" } ], "clip_score": 32.84375, "loss": 1.6207212209701538 }, { "id": "identity_82217", "image": "UniGeo/images/2529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment BM?" }, { "from": "gpt", "value": "The answer is √17" } ], "clip_score": 26.828125, "loss": 2.808290958404541 }, { "id": "identity_108487", "image": "VQA2.0/images/335405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the terrain like on either side of the road?" }, { "from": "gpt", "value": "The answer is Grassy and hilly terrain" } ], "clip_score": 26.96875, "loss": 2.629316568374634 }, { "id": "identity_93777", "image": "IconQA/images/71391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any marbles that are not orange?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.90625, "loss": 2.9860637187957764 }, { "id": "identity_323512", "image": "IconQA/images/48881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are there?\nChoices:\n(A) 50\n(B) 60\n(C) 49" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 3.177804470062256 }, { "id": "identity_112812", "image": "PMC-VQA/images/PMC7770766_Fig2_331531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale of magnification used in this image?" }, { "from": "gpt", "value": "The answer is 200 nanometer scale" } ], "clip_score": 29.359375, "loss": 2.044811964035034 }, { "id": "identity_109634", "image": "PMC-VQA/images/PMC8126899_os13015-fig-0005_443332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the structure in the image?" }, { "from": "gpt", "value": "The answer is Anteroposterior view" } ], "clip_score": 22.515625, "loss": 3.089763879776001 }, { "id": "identity_167189", "image": "TabMWP/images/27204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of plants in Mason's garden?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 30.375, "loss": 2.2556710243225098 }, { "id": "identity_256866", "image": "GeoQA+/images/1076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC的顶点A、B、C均在⊙O上,∠OAC=40°,∠OBC=15°, 求∠ACB的度数。" }, { "from": "gpt", "value": "The answer is 27.5°" } ], "clip_score": 32.40625, "loss": 1.8298931121826172 }, { "id": "identity_110273", "image": "PMC-VQA/images/PMC8365398_ccr34365-fig-0003_34973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bone shown in image B fully intact or fractured?" }, { "from": "gpt", "value": "The answer is The bone is intact, possibly post-fracture stabilization" } ], "clip_score": 32.59375, "loss": 3.1793410778045654 }, { "id": "identity_88007", "image": "Geometry3K/images/469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of angles RCL and MCL?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 32.90625, "loss": 1.5900774002075195 }, { "id": "identity_93198", "image": "CLEVR-Math/images/CLEVR_train_007780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are matte?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.953125, "loss": 2.8328521251678467 }, { "id": "identity_19574", "image": "DocVQA/images/zrlp0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of Holidays?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 16.78125, "loss": 2.343057632446289 }, { "id": "identity_267743", "image": "GeoQA+/images/273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a regular pentagon and a regular hexagon with equal side lengths are joined together as shown in the diagram, what is the degree measure of angle ABO?\nChoices:\n(A) 24°\n(B) 48°\n(C) 60°\n(D) 72°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.1875, "loss": 2.6654369831085205 }, { "id": "identity_124958", "image": "TextVQA/images/2a68c3c38defd6dc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the books on the bottom shelf?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.84375, "loss": 3.424241542816162 }, { "id": "identity_61123", "image": "ChartQA/images/74100259001967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in per capita energy consumption from nuclear between India and Luxembourg in 2012?" }, { "from": "gpt", "value": "The answer is 40 kWh" } ], "clip_score": 31.265625, "loss": 2.192835807800293 }, { "id": "identity_272057", "image": "DocVQA/images/hqcb0079_30.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of substrate is specified in the document?" }, { "from": "gpt", "value": "The answer is 98633" } ], "clip_score": 25.484375, "loss": 3.2743923664093018 }, { "id": "identity_300248", "image": "AI2D/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is an example of something that produces?\nChoices:\n(A) water lily\n(B) small fish\n(C) insect\n(D) large fish" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.875, "loss": 4.221869945526123 }, { "id": "identity_206902", "image": "PMC-VQA/images/PMC3633951_pone-0061900-g003_200304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage of cell development is labeled with the letter G?" }, { "from": "gpt", "value": "The answer is Morula stage" } ], "clip_score": 32.875, "loss": 3.08347487449646 }, { "id": "identity_186812", "image": "VQA2.0/images/403307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person under the multicolored umbrella doing?" }, { "from": "gpt", "value": "The answer is Lying down and reading/using a device" } ], "clip_score": 23.8125, "loss": 3.4016294479370117 }, { "id": "identity_271978", "image": "IconQA/images/70072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of stars?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 30.3125, "loss": 1.9157676696777344 }, { "id": "identity_146089", "image": "DocVQA/images/hmvj0223_27.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is CAFL LTD Standalone Balance Sheet" } ], "clip_score": 28.046875, "loss": 3.8297228813171387 }, { "id": "identity_330574", "image": "PMC-VQA/images/PMC3407708_F5_147299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kinds of images were taken of the cervical spine?\nChoices:\n(A) Sagittal and Coronal\n(B) Axial and Oblique\n(C) Transverse and MIP\n(D) Lateral and Computed Tomography" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.84375, "loss": 4.364710807800293 }, { "id": "identity_313933", "image": "ScienceQA/images/12305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms is a primary consumer in this food web?\nChoices:\n(A) barren-ground caribou\n(B) parasitic jaeger\n(C) rough-legged hawk" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5, "loss": 3.960052013397217 }, { "id": "identity_37592", "image": "AI2D/images/2727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: D is represented as what moon phase?\nChoices:\n(A) Waxing Gibbous\n(B) Waning Gibbous\n(C) Full\n(D) New" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.609375, "loss": 4.201836109161377 }, { "id": "identity_172204", "image": "IconQA/images/101795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are there in total?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 17.0625, "loss": 1.631479263305664 }, { "id": "identity_140887", "image": "DocVQA/images/prfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount spent on gratuities?" }, { "from": "gpt", "value": "The answer is $5.25" } ], "clip_score": 23.640625, "loss": 2.038306951522827 }, { "id": "identity_303594", "image": "FigureQA/images/41913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.03125, "loss": 3.8344643115997314 }, { "id": "identity_68961", "image": "TQA/images/question_images/hydrocarbons_8094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of hydrocarbon represented by pentane" }, { "from": "gpt", "value": "The answer is Alkane" } ], "clip_score": 33.375, "loss": 2.1912472248077393 }, { "id": "identity_217602", "image": "A-OKVQA/images/29327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many giraffes are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.046875, "loss": 4.127584934234619 }, { "id": "identity_44497", "image": "DVQA/images/bar_train_00040575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is hotel" } ], "clip_score": 34.90625, "loss": 3.39375376701355 }, { "id": "identity_186706", "image": "A-OKVQA/images/185302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats have visible registration numbers on their hulls?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.375, "loss": 4.7498979568481445 }, { "id": "identity_160208", "image": "GeoQA+/images/2025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and D is a point on the circle, with a tangent line from D intersecting the extended line of AB at point C, and angle C is 30 degrees, what is the measure of angle AOD?" }, { "from": "gpt", "value": "The answer is Angle AOD is 180 degrees" } ], "clip_score": 36.21875, "loss": 1.9926501512527466 }, { "id": "identity_205654", "image": "PMC-VQA/images/PMC9175008_F4_304069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is shown in the center of the image?" }, { "from": "gpt", "value": "The answer is Heart or aorta" } ], "clip_score": 22.375, "loss": 3.3833301067352295 }, { "id": "identity_25338", "image": "VizWiz/images/VizWiz_train_00014339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Hello and what kind of flavor of dinner is this one? Thanks a lot for your help." }, { "from": "gpt", "value": "The answer is turkey" } ], "clip_score": 23.203125, "loss": 3.286358594894409 }, { "id": "identity_207888", "image": "TextVQA/images/103f790f250cd011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What design concept is mentioned on the image?" }, { "from": "gpt", "value": "The answer is zumtobel design" } ], "clip_score": 26.703125, "loss": 3.253729820251465 }, { "id": "identity_206303", "image": "A-OKVQA/images/464237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are shaped like rings?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.5, "loss": 2.6003057956695557 }, { "id": "identity_257628", "image": "GeoQA+/images/4006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC≌△ADC,∠ABC=118°,∠DAC=40°,求∠BAC的度数和∠BCD的度数之和。" }, { "from": "gpt", "value": "The answer is 71°" } ], "clip_score": 26.34375, "loss": 2.5206706523895264 }, { "id": "identity_18802", "image": "ScienceQA/images/16869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the main persuasive appeal used in this ad?\nChoices:\n(A) pathos (emotion)\n(B) logos (reason)\n(C) ethos (character)" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 7.43359375, "loss": 4.723050117492676 }, { "id": "identity_147645", "image": "DVQA/images/bar_train_00041953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest percentage of people preferring the \"evil\" category?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 23.65625, "loss": 2.6329407691955566 }, { "id": "identity_170870", "image": "TabMWP/images/9096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the goal was to bake at least 200 cookies each, who did not meet the goal?" }, { "from": "gpt", "value": "The answer is Oscar" } ], "clip_score": 33.65625, "loss": 3.238353729248047 }, { "id": "identity_220387", "image": "TextVQA/images/5515f3d3b673ed49.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the hair tie?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.984375, "loss": 3.935972213745117 }, { "id": "identity_124802", "image": "TextVQA/images/43954e59fbc7b85a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What natural event is this alert system designed for?" }, { "from": "gpt", "value": "The answer is Volcanic activity" } ], "clip_score": 22.265625, "loss": 2.762455463409424 }, { "id": "identity_98225", "image": "IconQA/images/93562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more circles are added, what will be the new ratio of triangles to circles?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 30.375, "loss": 2.169166326522827 }, { "id": "identity_322322", "image": "Super-CLEVR/images/superCLEVR_new_000307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any metallic vehicles of the same color as the tiny metal motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.265625, "loss": 5.4316182136535645 }, { "id": "identity_99397", "image": "CLEVR-Math/images/CLEVR_train_058231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many spherical objects are present?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.140625, "loss": 2.325402021408081 }, { "id": "identity_20232", "image": "GeoQA+/images/2890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,边长为4的正方形ABCD中有一个小正方形EFGH,其中E、F、G分别在AB、BC、FD上,若BF=1,则小正方形的边长为()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{4}{5}\n(C) \\frac{5}{4}\n(D) \\frac{5}{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.453125, "loss": 2.911529541015625 }, { "id": "identity_247194", "image": "TextVQA/images/441a018f395f1cf0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophies are there in the picture, and what is the total number of 1st, 2nd, and 3rd place trophies?" }, { "from": "gpt", "value": "The answer is 9 trophies, 3 place trophies" } ], "clip_score": 28.515625, "loss": 2.2292532920837402 }, { "id": "identity_313467", "image": "VizWiz/images/VizWiz_train_00002660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the writing on these buttons and which one is which?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 23.0, "loss": 3.3012869358062744 }, { "id": "identity_334470", "image": "ChartQA/images/OECD_INDUSTRIAL_PRODUCTION_COL_DEU_EST_PRT_USA_000001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is shown by this green bar?" }, { "from": "gpt", "value": "The answer is Estonia" } ], "clip_score": 24.453125, "loss": 3.503816604614258 }, { "id": "identity_216276", "image": "A-OKVQA/images/235237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the vehicles in the foreground?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.078125, "loss": 2.852785110473633 }, { "id": "identity_258864", "image": "Super-CLEVR/images/superCLEVR_new_001838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the rubber object that is in front of the object behind the tiny metal motorbike, and what is the shape of the object that is to the left of the rubber object?" }, { "from": "gpt", "value": "The answer is Green, scooter" } ], "clip_score": 29.0, "loss": 3.858515739440918 }, { "id": "identity_169268", "image": "IconQA/images/67116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the opening of the bell?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 28.75, "loss": 3.568403482437134 }, { "id": "identity_25328", "image": "FigureQA/images/46507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.28125, "loss": 3.953664779663086 }, { "id": "identity_28172", "image": "A-OKVQA/images/244088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structure can be seen here?\nChoices:\n(A) ramparts\n(B) portcullis\n(C) roof\n(D) drawbridge" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.84375, "loss": 4.620110988616943 }, { "id": "identity_25221", "image": "TabMWP/images/17468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Polly need to buy 4 toothbrushes?" }, { "from": "gpt", "value": "The answer is 6.56" } ], "clip_score": 23.015625, "loss": 2.312331199645996 }, { "id": "identity_206704", "image": "VQA2.0/images/390345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it in the photo?" }, { "from": "gpt", "value": "The answer is Early morning or late afternoon" } ], "clip_score": 16.953125, "loss": 3.865778684616089 }, { "id": "identity_267481", "image": "DocVQA/images/fywn0226_28.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total number of primary Hip procedures performed in the year 2008/09?" }, { "from": "gpt", "value": "The answer is 69,888" } ], "clip_score": 27.6875, "loss": 2.1960089206695557 }, { "id": "identity_247022", "image": "GeoQA+/images/5844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 在△ABC中, D、E为边AB、AC的中点, 已知△ADE的面积为4, 求△BDE的面积和△ABC的面积之和是多少?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.5, "loss": 2.502760648727417 }, { "id": "identity_257635", "image": "IconQA/images/46844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the activities Janet did on Tuesday before bed in chronological order and calculate the total time spent on these activities if each activity took 30 minutes" }, { "from": "gpt", "value": "The answer is 90 minutes" } ], "clip_score": 32.3125, "loss": 2.403097629547119 }, { "id": "identity_30782", "image": "GeoQA+/images/3543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知直线a∥b∥c,直线m交直线a,b,c于点A,B,C,直线n交直线a,b,c于点D,E,F,若\\frac{AB}{BC}=\\frac{1}{2},则\\frac{DE}{EF}=()\nChoices:\n(A) \\frac{1}{3}\n(B) \\frac{1}{2}\n(C) \\frac{2}{3}\n(D) 1" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.921875, "loss": 2.3338818550109863 }, { "id": "identity_119984", "image": "VizWiz/images/VizWiz_train_00023749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many points does \"Level 4\" have?" }, { "from": "gpt", "value": "The answer is 3,650 points" } ], "clip_score": 21.890625, "loss": 1.2967880964279175 }, { "id": "identity_179443", "image": "TabMWP/images/30456.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much does it cost to buy one of each item?" }, { "from": "gpt", "value": "The answer is $2.87" } ], "clip_score": 30.125, "loss": 1.8838931322097778 }, { "id": "identity_261437", "image": "AI2D/images/2913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the soft, fleshy fin located on the back of a fish, positioned behind the dorsal fin and just in front of the caudal fin?\nChoices:\n(A) lateral line\n(B) preopercle\n(C) dorsal fin\n(D) adipose fin" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.28125, "loss": 3.6855628490448 }, { "id": "identity_145332", "image": "FigureQA/images/67230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Rosy Brown on the y-axis label?" }, { "from": "gpt", "value": "The answer is 96.55" } ], "clip_score": 28.90625, "loss": 2.391667127609253 }, { "id": "identity_7254", "image": "TabMWP/images/37769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Wesley has $251.00. How much money will Wesley have left if he buys a bat signed by a famous baseball player and a photograph signed by a famous movie star?" }, { "from": "gpt", "value": "The answer is 99" } ], "clip_score": 28.546875, "loss": 3.173663854598999 }, { "id": "identity_184252", "image": "PMC-VQA/images/PMC4581082_Fig2_426468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are pointing to features within the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.5, "loss": 2.4403421878814697 }, { "id": "identity_329486", "image": "IconQA/images/104250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of this to the nearest inch. This is about (_) inches long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.28125, "loss": 2.4642810821533203 }, { "id": "identity_316930", "image": "GeoQA+/images/2200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,某地政府准备修建一座高AB=6m的过街天桥。已知天桥的坡面AC与地面BC的夹角为∠ACB,且sin∠ACB=\\frac{3}{5}。则坡面的AC的长度为()\n选项:\n(A) 6√{3}m\n(B) 10m\n(C) 8m\n(D) 7m" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 3.0386669635772705 }, { "id": "identity_221916", "image": "PMC-VQA/images/PMC8128441_fig4_443760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What gene is associated with the cell in the image?" }, { "from": "gpt", "value": "The answer is Atg9" } ], "clip_score": 24.8125, "loss": 2.3868327140808105 }, { "id": "identity_106944", "image": "Super-CLEVR/images/superCLEVR_new_001672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly to the right of the purple bus?" }, { "from": "gpt", "value": "The answer is A pickup truck" } ], "clip_score": 26.375, "loss": 3.084218740463257 }, { "id": "identity_213180", "image": "A-OKVQA/images/519850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the background?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 13.0234375, "loss": 2.515148162841797 }, { "id": "identity_71601", "image": "ScienceQA/images/18060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geological features dominate the landscape of Peary Land?" }, { "from": "gpt", "value": "The answer is Mountains and rocky terrain" } ], "clip_score": 27.03125, "loss": 2.661190986633301 }, { "id": "identity_39321", "image": "FigureQA/images/36751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is The title is \"title" } ], "clip_score": 36.03125, "loss": 4.088164329528809 }, { "id": "identity_285721", "image": "A-OKVQA/images/340594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common item do the two individuals at each end of the table possess?\nChoices:\n(A) glasses\n(B) coats\n(C) hats\n(D) backpacks" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.0625, "loss": 4.153107643127441 }, { "id": "identity_103370", "image": "PMC-VQA/images/PMC9172626_goac023-F1_303318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the tumor compared to the surrounding tissue?" }, { "from": "gpt", "value": "The answer is Isodense" } ], "clip_score": 30.03125, "loss": 3.160876750946045 }, { "id": "identity_276355", "image": "IconQA/images/9723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of dots?\nChoices:\n(A) 3\n(B) 4\n(C) 2\n(D) 1\n(E) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.21875, "loss": 3.3327553272247314 }, { "id": "identity_332161", "image": "VQA2.0/images/428788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many police vehicles are present here?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.421875, "loss": 2.2170937061309814 }, { "id": "identity_309043", "image": "TQA/images/question_images/human_system_eye_6124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost structure at the front?\nChoices:\n(A) Sclera\n(B) Cornea\n(C) Iris\n(D) Lens" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.390625, "loss": 4.30801248550415 }, { "id": "identity_292529", "image": "A-OKVQA/images/316363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which object does the largest kite flying resemble, out of the following choices: moth, cell phone, hummingbird, or human?\nChoices:\n(A) moth\n(B) cell phone\n(C) hummingbird\n(D) human" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.75, "loss": 3.872861385345459 }, { "id": "identity_254668", "image": "IconQA/images/97204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many diamonds are there in two rows of the same length as the given row?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 27.3125, "loss": 2.1173999309539795 }, { "id": "identity_56235", "image": "FigureQA/images/33540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Dark Turquoise in the pie chart?" }, { "from": "gpt", "value": "The answer is approximately 0.56" } ], "clip_score": 36.90625, "loss": 2.2974636554718018 }, { "id": "identity_46274", "image": "DocVQA/images/ylvw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the status of the Totelle Monograph 1 and 2mg project?" }, { "from": "gpt", "value": "The answer is Draft manuscript submitted for review 1/2/02" } ], "clip_score": 25.0625, "loss": 2.9842679500579834 }, { "id": "identity_176534", "image": "IconQA/images/71221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you color one shape red, how many will remain uncolored?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.796875, "loss": 2.0965864658355713 }, { "id": "identity_286777", "image": "ScienceQA/images/6360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the accuracy of the statement regarding our solar system:\nAmong the four largest planets, is it true or false that three are primarily composed of gas?\nOptions:\n(A) true\n(B) false" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 3.4508233070373535 }, { "id": "identity_315264", "image": "A-OKVQA/images/468953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they talking about?\nChoices:\n(A) traffic\n(B) gas cost\n(C) plans date\n(D) gun cost" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.703125, "loss": 4.3034348487854 }, { "id": "identity_92884", "image": "IconQA/images/105972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour and minute hands of the clock?" }, { "from": "gpt", "value": "The answer is 165 degrees" } ], "clip_score": 24.59375, "loss": 2.302898645401001 }, { "id": "identity_295994", "image": "FigureQA/images/6705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in value, Web Purple or Light Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.46875, "loss": 4.532818794250488 }, { "id": "identity_70026", "image": "ScienceQA/images/6520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which battery experienced a greater temperature change?" }, { "from": "gpt", "value": "The answer is Battery in the less expensive phone" } ], "clip_score": 24.84375, "loss": 1.312889814376831 }, { "id": "identity_192476", "image": "PMC-VQA/images/PMC6476123_fig2_469077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is shown in this image?" }, { "from": "gpt", "value": "The answer is Lumbar spine" } ], "clip_score": 28.9375, "loss": 2.824854850769043 }, { "id": "identity_42194", "image": "DVQA/images/bar_train_00000502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest accuracy value for the 'data' dataset?" }, { "from": "gpt", "value": "The answer is wit" } ], "clip_score": 30.578125, "loss": 4.459616184234619 }, { "id": "identity_99723", "image": "IconQA/images/40688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shape is made of wireframe and each cm of wire costs $0.10, how much would the wire for the top hexagon cost?" }, { "from": "gpt", "value": "The answer is $1.20" } ], "clip_score": 28.796875, "loss": 1.3201838731765747 }, { "id": "identity_175239", "image": "TabMWP/images/9886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of hammers sold over the past 7 days?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.28125, "loss": 2.731351137161255 }, { "id": "identity_217844", "image": "A-OKVQA/images/390539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible side-view mirrors are on the motorcycle?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.28125, "loss": 4.173684597015381 }, { "id": "identity_79123", "image": "UniGeo/images/1084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,已知∠A=50°,求∠ABD的度数为()" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 32.59375, "loss": 1.5803204774856567 }, { "id": "identity_268046", "image": "ScienceQA/images/12385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image? \nOptions:\n(A) West Virginia\n(B) Iowa\n(C) Rhode Island\n(D) New York" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.03125, "loss": 4.629692554473877 }, { "id": "identity_183942", "image": "A-OKVQA/images/396958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation infrastructure is depicted in the image?" }, { "from": "gpt", "value": "The answer is Train station" } ], "clip_score": 28.890625, "loss": 3.557168960571289 }, { "id": "identity_287025", "image": "IconQA/images/46307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the cactus in terms of the number of cubes?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.296875, "loss": 2.2940118312835693 }, { "id": "identity_259026", "image": "A-OKVQA/images/68956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary colors used in the exterior design of Rita's building, and what type of food does Rita's sell at the bottom of this picture?" }, { "from": "gpt", "value": "The answer is Blue, white, red, and ice cream" } ], "clip_score": 26.71875, "loss": 3.0882935523986816 }, { "id": "identity_2762", "image": "MapQA/images/map_3024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the map have missing data ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.59375, "loss": 3.060922145843506 }, { "id": "identity_153730", "image": "ScienceQA/images/5731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the east?" }, { "from": "gpt", "value": "The answer is Montana" } ], "clip_score": 31.09375, "loss": 3.222073793411255 }, { "id": "identity_182866", "image": "A-OKVQA/images/246183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella held by the person in the center?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 18.875, "loss": 2.948676824569702 }, { "id": "identity_2900", "image": "FigureQA/images/39544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.046875, "loss": 3.7636256217956543 }, { "id": "identity_192575", "image": "A-OKVQA/images/121651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 22.6875, "loss": 2.18679141998291 }, { "id": "identity_130987", "image": "A-OKVQA/images/229936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is it currently on the clock?" }, { "from": "gpt", "value": "The answer is Ten past three" } ], "clip_score": 14.9140625, "loss": 3.964447498321533 }, { "id": "identity_308006", "image": "TextVQA/images/ac069e046487a0c9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What name for a body of open water is on the blue background behind the players' benches?" }, { "from": "gpt", "value": "The answer is ocean" } ], "clip_score": 19.34375, "loss": 3.793778896331787 }, { "id": "identity_99101", "image": "IconQA/images/58136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the sequence continues, what will be the image in the sixth position?" }, { "from": "gpt", "value": "The answer is Paw" } ], "clip_score": 21.953125, "loss": 2.8405160903930664 }, { "id": "identity_275730", "image": "TabMWP/images/30316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the schedule provided, at what time does the sporting group competition conclude?\nChoices:\n(A) 7:55 P.M.\n(B) 3:00 P.M.\n(C) 5:10 P.M.\n(D) 6:30 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.484375, "loss": 3.51884388923645 }, { "id": "identity_30374", "image": "PMC-VQA/images/PMC4033054_F4_291575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain imaging technique was used to generate this image? \nChoices:\n(A) Positron Emission Tomography (PET) \n(B) Electroencephalography (EEG) \n(C) Magnetoencephalography (MEG) \n(D) Magnetic Resonance Imaging (MRI) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.359375, "loss": 4.131989479064941 }, { "id": "identity_235321", "image": "ScienceQA/images/15793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the mammal in the image and describe two distinct characteristics that classify it as a mammal" }, { "from": "gpt", "value": "The answer is Kangaroo, mammary glands, fur" } ], "clip_score": 31.9375, "loss": 3.147876024246216 }, { "id": "identity_114400", "image": "TextVQA/images/005b92c904f041ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen standing under the tent?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 15.8515625, "loss": 5.090975284576416 }, { "id": "identity_179270", "image": "CLEVR-Math/images/CLEVR_train_016512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 2 green matte objects. How many green objects exist?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.734375, "loss": 2.2303593158721924 }, { "id": "identity_77165", "image": "AI2D/images/2125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the image is a secondary consumer?" }, { "from": "gpt", "value": "The answer is Owl" } ], "clip_score": 24.90625, "loss": 2.341097593307495 }, { "id": "identity_42067", "image": "DVQA/images/bar_train_00194527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store category has the lowest sales for the item \"melody\"?" }, { "from": "gpt", "value": "The answer is draft" } ], "clip_score": 33.21875, "loss": 3.5788416862487793 }, { "id": "identity_15684", "image": "ScienceQA/images/3684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is quartzite?\nChoices:\n(A) sedimentary\n(B) metamorphic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.1875, "loss": 3.931323289871216 }, { "id": "identity_111411", "image": "VQA2.0/images/390524.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites have a predominantly red color?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.6875, "loss": 2.4463062286376953 }, { "id": "identity_333230", "image": "AI2D/images/837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many antennas does this insect have?\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.796875, "loss": 3.2944424152374268 }, { "id": "identity_30447", "image": "TextVQA/images/3406cf27d690c653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the side of the taxi on the left side of this picture?" }, { "from": "gpt", "value": "The answer is nyc" } ], "clip_score": 30.671875, "loss": 2.473546266555786 }, { "id": "identity_47741", "image": "MapQA/images/map_11051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the state of Florida fall within the second lowest value range for masses?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 27.984375, "loss": 3.0731847286224365 }, { "id": "identity_298011", "image": "VQA2.0/images/348826.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals in close proximity to the airplane?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.265625, "loss": 2.1664836406707764 }, { "id": "identity_205187", "image": "A-OKVQA/images/204273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the backstop behind the catcher?" }, { "from": "gpt", "value": "The answer is Northwestern Medicine" } ], "clip_score": 23.578125, "loss": 3.4485607147216797 }, { "id": "identity_206242", "image": "PMC-VQA/images/PMC5500524_Fig2_187345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement in the image?" }, { "from": "gpt", "value": "The answer is 50 micrometers" } ], "clip_score": 23.890625, "loss": 1.9411303997039795 }, { "id": "identity_232496", "image": "VQA-AS/images/15328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How far is the fireplace from the right window compared to the left window?" }, { "from": "gpt", "value": "The answer is The fireplace is equidistant from both windows" } ], "clip_score": 31.625, "loss": 2.1436846256256104 }, { "id": "identity_112501", "image": "Super-CLEVR/images/superCLEVR_new_001425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the green object to the left of the fighter jet a motorcycle?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.40625, "loss": 3.835178852081299 }, { "id": "identity_327319", "image": "PMC-VQA/images/PMC7764864_nanomaterials-10-02504-f007_328944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which images show greater magnification?\nChoices:\n(A) a and b\n(B) b and c\n(C) c and d\n(D) d and e" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.84375, "loss": 4.04060173034668 }, { "id": "identity_202174", "image": "A-OKVQA/images/463533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pitcher's team uniform?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 25.890625, "loss": 3.9010658264160156 }, { "id": "identity_138916", "image": "MapQA/images/map_7090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of pickers between 48.8% and 49.6%?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 34.875, "loss": 4.266054153442383 }, { "id": "identity_227834", "image": "DocVQA/images/fxwg0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands and product types are listed under the \"CORE REQUIREMENTS\" and \"ADDITIONAL REQUIREMENTS\" sections combined?" }, { "from": "gpt", "value": "The answer is 14 brands and 55 product types" } ], "clip_score": 18.234375, "loss": 1.8726719617843628 }, { "id": "identity_107568", "image": "TextVQA/images/23b6b8e23bbef788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the subdials on the watch?" }, { "from": "gpt", "value": "The answer is Chronograph and fractional second measurement" } ], "clip_score": 29.765625, "loss": 4.177751541137695 }, { "id": "identity_331041", "image": "A-OKVQA/images/333978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What carbonated drink is in the green container on the right?\nChoices:\n(A) sprite\n(B) mountain dew\n(C) 7-up\n(D) schweppes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.359375, "loss": 3.8511805534362793 }, { "id": "identity_261232", "image": "GeoQA+/images/2359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,AB是⊙O的直径,点C为⊙O外一点,CA,CD是⊙O的切线,A,D为切点,连接BD,AD.若∠ACD=30°,求∠BAD的大小。" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 22.59375, "loss": 1.4949932098388672 }, { "id": "identity_169714", "image": "IconQA/images/28875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are there without a heart?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.0625, "loss": 2.3153092861175537 }, { "id": "identity_142987", "image": "DocVQA/images/kgxc0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the AIN annual subscription rate?" }, { "from": "gpt", "value": "The answer is $8.50" } ], "clip_score": 22.53125, "loss": 1.93085777759552 }, { "id": "identity_140638", "image": "DocVQA/images/qlhx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the publication titled \"Vasectomy: Effect on the ductuli efferentes\"?" }, { "from": "gpt", "value": "The answer is Alexander, N. J" } ], "clip_score": 17.828125, "loss": 4.269598007202148 }, { "id": "identity_224934", "image": "ScienceQA/images/4763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and name its capital city" }, { "from": "gpt", "value": "The answer is Santo Domingo" } ], "clip_score": 27.984375, "loss": 3.1292624473571777 }, { "id": "identity_81894", "image": "UniGeo/images/1528.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of segment AB is 4 units, what is the length of segment BC?" }, { "from": "gpt", "value": "The answer is 8 units" } ], "clip_score": 30.421875, "loss": 2.309566020965576 }, { "id": "identity_224965", "image": "PMC-VQA/images/PMC8815180_Fig2_184364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure being ligated and describe its anatomical significance and function" }, { "from": "gpt", "value": "The answer is Short hepatic veins drain blood from liver to inferior vena cava" } ], "clip_score": 29.015625, "loss": 2.8715829849243164 }, { "id": "identity_154986", "image": "ScienceQA/images/17463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Gabrielle and Patty were to trade items, how many different trades could they make?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 21.75, "loss": 2.9087657928466797 }, { "id": "identity_167501", "image": "TabMWP/images/8933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many superheroes have the ability to fly regardless of their personality flaw?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.390625, "loss": 2.9412574768066406 }, { "id": "identity_334398", "image": "TextVQA/images/e4b0aa6724468e78.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many short novels are in this book?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 16.875, "loss": 2.550039768218994 }, { "id": "identity_183413", "image": "TextVQA/images/009817f6e8bcd068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car's body?" }, { "from": "gpt", "value": "The answer is Yellow and white" } ], "clip_score": 25.953125, "loss": 3.088946580886841 }, { "id": "identity_126551", "image": "PMC-VQA/images/PMC6635723_F2_9840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is depicted in this image?" }, { "from": "gpt", "value": "The answer is PET-CT scan" } ], "clip_score": 31.640625, "loss": 2.730194091796875 }, { "id": "identity_259666", "image": "CLEVR-Math/images/CLEVR_train_023263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After removing all gray blocks, how many objects are left in total, and how many of them are cylinders?" }, { "from": "gpt", "value": "The answer is 7 objects in total, 5 of which are cylinders" } ], "clip_score": 29.9375, "loss": 1.5126218795776367 }, { "id": "identity_210370", "image": "VQA2.0/images/330907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person doing under the umbrella?" }, { "from": "gpt", "value": "The answer is Reading" } ], "clip_score": 20.921875, "loss": 3.7020301818847656 }, { "id": "identity_91077", "image": "GeoQA+/images/2534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle, if chords AB and CD intersect at E and angle A is 40 degrees, what is the measure of angle DEC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 36.84375, "loss": 1.6801620721817017 }, { "id": "identity_84072", "image": "UniGeo/images/3034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given image, if triangle ABC is rotated clockwise around point A by 60 degrees to form triangle ADE, what is the measure of angle CAD?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 33.78125, "loss": 1.7507927417755127 }, { "id": "identity_226650", "image": "VQA2.0/images/26833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the items on the table, and how many of each type are there?" }, { "from": "gpt", "value": "The answer is 3 bowls, 5 glasses, 4 utensils" } ], "clip_score": 20.5, "loss": 2.285862922668457 }, { "id": "identity_87400", "image": "Geometry3K/images/1073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the entire figure?" }, { "from": "gpt", "value": "The answer is 12 + 12√2" } ], "clip_score": 32.90625, "loss": 2.4163501262664795 }, { "id": "identity_295000", "image": "PMC-VQA/images/PMC6855902_FIG4_68470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the red arrow in the CT scan pointing to? \nChoices:\n(A) Lack of bone union \n(B) Existence of bone union \n(C) Presence of inflammation \n(D) Indication of infection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.421875, "loss": 5.291035175323486 }, { "id": "identity_31782", "image": "AI2D/images/12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The drying sponge cactus in diagram A is covered with\nChoices:\n(A) Wax paper\n(B) Cellophane\n(C) Water\n(D) Glue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.21875, "loss": 3.778064727783203 }, { "id": "identity_132876", "image": "VizWiz/images/VizWiz_train_00018984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bottle cap?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 23.28125, "loss": 3.088588237762451 }, { "id": "identity_239092", "image": "VQA2.0/images/267697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pairs of animals can be formed from the animals present in the image?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 21.859375, "loss": 2.9284820556640625 }, { "id": "identity_160783", "image": "UniGeo/images/2602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if ∠A = 50°, what is the measure of ∠BCD?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 34.875, "loss": 1.5828548669815063 }, { "id": "identity_163190", "image": "GeoQA+/images/4773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangle ABC has an area of 12 and AD is the median, what is the length of BD if AD = 8?" }, { "from": "gpt", "value": "The answer is 3sqrt(3)" } ], "clip_score": 34.5, "loss": 1.7393224239349365 }, { "id": "identity_36011", "image": "PMC-VQA/images/PMC9088844_F1_276123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the patient in the figure showing a single or multiple bony abnormalities? \nChoices:\n(A) Single \n(B) Multiple \n(C) Absent \n(D) Unknown " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.96875, "loss": 4.040863513946533 }, { "id": "identity_125896", "image": "VQA2.0/images/60362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the building in the image?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 30.234375, "loss": 2.978409767150879 }, { "id": "identity_325664", "image": "DVQA/images/bar_train_00088904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this group of bars?" }, { "from": "gpt", "value": "The answer is auto" } ], "clip_score": 18.71875, "loss": 4.494360446929932 }, { "id": "identity_167870", "image": "TabMWP/images/31545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average temperature at noon over the 7 days?" }, { "from": "gpt", "value": "The answer is approximately 7.14°C" } ], "clip_score": 31.765625, "loss": 2.0799009799957275 }, { "id": "identity_145331", "image": "FigureQA/images/67230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Mint have a consistent value across the x-axis label?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.234375, "loss": 2.7407052516937256 }, { "id": "identity_257531", "image": "VQA2.0/images/260050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trucks and buses are visible in the image, and what are their colors?" }, { "from": "gpt", "value": "The answer is Three trucks (white, white, brown) and two buses (yellow, yellow)" } ], "clip_score": 25.734375, "loss": 1.7260571718215942 }, { "id": "identity_310745", "image": "TabMWP/images/8303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The transportation company tracked the number of train tickets sold in the past 4 days. On which day were the most tickets sold?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 41.09375, "loss": 3.857434034347534 }, { "id": "identity_79589", "image": "Geometry3K/images/1684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 4?" }, { "from": "gpt", "value": "The answer is 20 degrees" } ], "clip_score": 33.625, "loss": 2.183901309967041 }, { "id": "identity_94540", "image": "CLEVR-Math/images/CLEVR_train_049815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many spheres are there in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.796875, "loss": 2.4617745876312256 }, { "id": "identity_101216", "image": "CLEVR-Math/images/CLEVR_train_002909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of green objects to yellow objects?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 27.34375, "loss": 1.5313667058944702 }, { "id": "identity_258817", "image": "PMC-VQA/images/PMC8078316_F1_429190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the implications of T2 hyperintensity throughout the spinal cord on the patient's neurological function and potential treatment options?" }, { "from": "gpt", "value": "The answer is Neurological deficits, multiple sclerosis, transverse myelitis, or tumors" } ], "clip_score": 31.796875, "loss": 2.176800012588501 }, { "id": "identity_332017", "image": "PMC-VQA/images/PMC2410174_fig1_23923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are being compared to demonstrate the reduction in tumor volume in patient #5?\nChoices:\n(A) T1W images\n(B) FLAIR images\n(C) DWI images\n(D) T2W-FSE images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.125, "loss": 4.290466785430908 }, { "id": "identity_56691", "image": "DocVQA/images/xglg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contract number mentioned in the document?" }, { "from": "gpt", "value": "The answer is DA-49-007-ND-749" } ], "clip_score": 20.25, "loss": 3.143643617630005 }, { "id": "identity_241121", "image": "UniGeo/images/2076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知△ABC的面积为12,点D在线段AC上,点F在线段BC的延长线上,且BC=4CF,四边形DCFE是平行四边形,若点E是线段BC的中点,则图中阴影部分的面积为多少?" }, { "from": "gpt", "value": "The answer is 1.5" } ], "clip_score": 27.515625, "loss": 1.9364895820617676 }, { "id": "identity_270801", "image": "DocVQA/images/krlf0023_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the job title of the individual who signed on behalf of the Retailer Side?" }, { "from": "gpt", "value": "The answer is Category Manager" } ], "clip_score": 18.71875, "loss": 3.430644989013672 }, { "id": "identity_108748", "image": "PMC-VQA/images/PMC6922484_F3_87489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of any abnormality in the cervical spine?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.15625, "loss": 2.7357354164123535 }, { "id": "identity_264272", "image": "MapQA/images/map_8846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the worth of Michigan?" }, { "from": "gpt", "value": "The answer is 13.3%-14.6%" } ], "clip_score": 24.1875, "loss": 1.9293310642242432 }, { "id": "identity_301826", "image": "TQA/images/abc_question_images/human_system_digestive_13702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents this organ?\nChoices:\n(A) T\n(B) E\n(C) S\n(D) V" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.625, "loss": 3.8082523345947266 }, { "id": "identity_197822", "image": "VQA-AS/images/18891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Sunny with some clouds" } ], "clip_score": 23.78125, "loss": 2.983248472213745 }, { "id": "identity_165631", "image": "Geometry3K/images/186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m angle 1 is 73, what is the sum of the measures of all angles formed by lines m, l, and t?" }, { "from": "gpt", "value": "The answer is 292" } ], "clip_score": 30.03125, "loss": 2.35003399848938 }, { "id": "identity_210634", "image": "VQA-AS/images/16109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition is depicted in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 20.28125, "loss": 2.953512191772461 }, { "id": "identity_308144", "image": "DVQA/images/bar_train_00163233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this bar?" }, { "from": "gpt", "value": "The answer is grade" } ], "clip_score": 20.34375, "loss": 4.125132083892822 }, { "id": "identity_238313", "image": "ScienceQA/images/8695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, and then list two major cities in that state" }, { "from": "gpt", "value": "The answer is Carson City, Las Vegas, Reno" } ], "clip_score": 29.734375, "loss": 1.8380883932113647 }, { "id": "identity_134289", "image": "DVQA/images/bar_train_00071711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value difference between 'plug' and 'week' for 'input'?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.28125, "loss": 2.8389432430267334 }, { "id": "identity_9116", "image": "VQA-AS/images/16118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is sitting next to the woman?" }, { "from": "gpt", "value": "The answer is cat" } ], "clip_score": 23.703125, "loss": 3.1512625217437744 }, { "id": "identity_324706", "image": "TextVQA/images/cda43c2602ceb129.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are these reserved for?" }, { "from": "gpt", "value": "The answer is staff" } ], "clip_score": 12.9453125, "loss": 3.2195823192596436 }, { "id": "identity_230015", "image": "UniGeo/images/2774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,C,D是⊙O上的两点,若∠BCD=28°,求∠ACD的度数。" }, { "from": "gpt", "value": "The answer is 56°" } ], "clip_score": 29.796875, "loss": 2.018157720565796 }, { "id": "identity_229914", "image": "IconQA/images/77318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does this picture have, and what are their orientations?" }, { "from": "gpt", "value": "The answer is One vertical line of symmetry" } ], "clip_score": 23.875, "loss": 4.095357894897461 }, { "id": "identity_72913", "image": "ScienceQA/images/21.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the image is not part of the food chain depicted?" }, { "from": "gpt", "value": "The answer is There is no organism outside the food chain depicted" } ], "clip_score": 26.890625, "loss": 2.15110445022583 }, { "id": "identity_274143", "image": "TextVQA/images/8c70eac1edc9b8be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the lanyard?" }, { "from": "gpt", "value": "The answer is demo" } ], "clip_score": 15.7578125, "loss": 4.682217597961426 }, { "id": "identity_187606", "image": "PMC-VQA/images/PMC3682927_F1_211337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pneumothorax in the given image?" }, { "from": "gpt", "value": "The answer is No evidence of pneumothorax" } ], "clip_score": 26.984375, "loss": 2.390062093734741 }, { "id": "identity_209925", "image": "A-OKVQA/images/59015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the barge?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.15625, "loss": 3.9940788745880127 }, { "id": "identity_144742", "image": "DocVQA/images/mmpg0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cartons of Winston Box are to be shipped?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.96875, "loss": 1.615789771080017 }, { "id": "identity_135101", "image": "DVQA/images/bar_train_00005221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people who prefer 'actor' across all categories?" }, { "from": "gpt", "value": "The answer is 370%" } ], "clip_score": 30.203125, "loss": 2.5889461040496826 }, { "id": "identity_189660", "image": "A-OKVQA/images/100827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue umbrellas are there in the outdoor seating area?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.0625, "loss": 2.439349412918091 }, { "id": "identity_327072", "image": "TextVQA/images/015329f53f79d0fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which mortgage company is a sponsor?" }, { "from": "gpt", "value": "The answer is ameriquest" } ], "clip_score": 15.0078125, "loss": 2.861257314682007 }, { "id": "identity_275721", "image": "Geometry3K/images/917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diameters of circles A, B, and C are 8 inches, 18 inches, and 11 inches, respectively. Calculate the value of F G.\nOptions:\n(A) 7\n(B) 8\n(C) 14\n(D) 16" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.28125, "loss": 1.9481884241104126 }, { "id": "identity_80792", "image": "GeoQA+/images/2953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that O is the midpoint of the diagonal AC in rectangle ABCD, and M is the midpoint of side CD, if AB = 12 and OM = 4.5, what is the length of diagonal AC?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 35.3125, "loss": 2.2053463459014893 }, { "id": "identity_146236", "image": "DocVQA/images/pkxg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bureau is Catherine M. Leamy associated with?" }, { "from": "gpt", "value": "The answer is Children's Bureau" } ], "clip_score": 23.40625, "loss": 2.037923812866211 }, { "id": "identity_267912", "image": "GeoQA+/images/5225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In right triangle ABC, with angle BAC equal to 90 degrees, AB measuring 6 units, and AC measuring 8 units, point P is a moving point on the hypotenuse BC. Point E is the foot of the perpendicular from P to AB, and point F is the foot of the perpendicular from P to AC. If EF intersects AP at point O, what is the minimum value of OF?\nChoices:\n(A) 4.8\n(B) 1.2\n(C) 3.6\n(D) 2.4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.796875, "loss": 2.234724283218384 }, { "id": "identity_238505", "image": "DVQA/images/bar_train_00091993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the values of the largest and smallest bars?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 23.859375, "loss": 1.914493203163147 }, { "id": "identity_141569", "image": "DVQA/images/bar_train_00094454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms are compared in this graph?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.9375, "loss": 2.4095115661621094 }, { "id": "identity_3883", "image": "Geometry3K/images/781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find m \\angle 5.\nChoices:\n(A) 28.5\n(B) 56\n(C) 57\n(D) 123" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.46875, "loss": 3.364880323410034 }, { "id": "identity_242455", "image": "DVQA/images/bar_train_00123955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the items wood and song across all stores, and which item has the higher total sales?" }, { "from": "gpt", "value": "The answer is wood" } ], "clip_score": 31.296875, "loss": 4.268998146057129 }, { "id": "identity_99044", "image": "IconQA/images/85374.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips are linked together?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.078125, "loss": 2.1870150566101074 }, { "id": "identity_161269", "image": "Geometry3K/images/1203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of the two acute angles in the triangle?" }, { "from": "gpt", "value": "The answer is 89 degrees" } ], "clip_score": 34.46875, "loss": 2.0991082191467285 }, { "id": "identity_119161", "image": "A-OKVQA/images/401071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible signage or branding on the umbrellas?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.578125, "loss": 3.0235636234283447 }, { "id": "identity_146620", "image": "ChartQA/images/12050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of unfavorable views for Taliban over the years shown in the graph?" }, { "from": "gpt", "value": "The answer is 63.57%" } ], "clip_score": 29.53125, "loss": 1.9618502855300903 }, { "id": "identity_58146", "image": "PlotQA/images/46532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much percent did the labor force participation for ages 50+ change from 2011 to 2014?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 36.40625, "loss": 1.942956805229187 }, { "id": "identity_321063", "image": "PMC-VQA/images/PMC5664813_Fig5_233716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image?\nChoices:\n(A) A ripe wheat spike\n(B) A wheat grain\n(C) Isolated wheat grains\n(D) A wheat seed" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.140625, "loss": 4.524722576141357 }, { "id": "identity_235466", "image": "ScienceQA/images/7599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that is located directly south of Pennsylvania and directly north of Virginia?" }, { "from": "gpt", "value": "The answer is Annapolis" } ], "clip_score": 26.53125, "loss": 2.861387252807617 }, { "id": "identity_46701", "image": "FigureQA/images/55909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Dark Turquoise bar taller than the Maroon bar?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.609375, "loss": 3.58546781539917 }, { "id": "identity_55792", "image": "DocVQA/images/xfjv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What position did George E. Wilber, Jr. hold at The Great Western Sugar Co.?" }, { "from": "gpt", "value": "The answer is President" } ], "clip_score": 20.0625, "loss": 3.1598174571990967 }, { "id": "identity_57048", "image": "DVQA/images/bar_train_00088584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who prefer 'strain' in the 'regard' category?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 29.078125, "loss": 1.7646551132202148 }, { "id": "identity_170959", "image": "IconQA/images/37468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour and minute hands of the clock?" }, { "from": "gpt", "value": "The answer is 270 degrees" } ], "clip_score": 26.453125, "loss": 2.365478754043579 }, { "id": "identity_185039", "image": "PMC-VQA/images/PMC3820302_fig12_241670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar length at the bottom of the image?" }, { "from": "gpt", "value": "The answer is 3mm" } ], "clip_score": 14.15625, "loss": 2.641368865966797 }, { "id": "identity_39820", "image": "FigureQA/images/65025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any color have a value below 90 on the y-axis label?" }, { "from": "gpt", "value": "The answer is Yes, Light Coral" } ], "clip_score": 28.234375, "loss": 2.7816991806030273 }, { "id": "identity_171955", "image": "TabMWP/images/28409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a family of two adults and two children visit the Bronx Zoo, how much will they spend on tickets in total?" }, { "from": "gpt", "value": "The answer is $52" } ], "clip_score": 31.25, "loss": 2.2358920574188232 }, { "id": "identity_136628", "image": "FigureQA/images/81293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does Lime Green appear above the 70 mark on the y-axis?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.703125, "loss": 2.090458631515503 }, { "id": "identity_321966", "image": "Geometry3K/images/1289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 1.5\n(B) 4\n(C) 6\n(D) 9" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.453125, "loss": 2.8381412029266357 }, { "id": "identity_186297", "image": "A-OKVQA/images/245598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the batter's helmet?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.953125, "loss": 3.479496479034424 }, { "id": "identity_25331", "image": "PMC-VQA/images/PMC7010902_Fig8_114208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe can we see the GRD in this patient? \nChoices:\n(A) Left temporal lobe \n(B) Right temporal lobe \n(C) Left parietal lobe \n(D) Right parietal lobe. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.640625, "loss": 4.100217819213867 }, { "id": "identity_301038", "image": "FigureQA/images/25881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is one color greater than the other?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.703125, "loss": 3.8165817260742188 }, { "id": "identity_176333", "image": "IconQA/images/38349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale factor of the transformation applied to the letter 'Q'?" }, { "from": "gpt", "value": "The answer is 1.33" } ], "clip_score": 26.71875, "loss": 1.9171875715255737 }, { "id": "identity_184968", "image": "A-OKVQA/images/79333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building in the background?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 16.546875, "loss": 4.027058124542236 }, { "id": "identity_324476", "image": "TabMWP/images/34800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A manager monitored the wait times of their customers. What is the shortest wait time?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.03125, "loss": 3.2226529121398926 }, { "id": "identity_229965", "image": "ScienceQA/images/17102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the life cycle of a pine tree starting from the seed stage and ending at the fertilized cone stage, including the roles of male and female cones" }, { "from": "gpt", "value": "The answer is Seed, seedling, adult tree, male cone, female cone, fertilized cone" } ], "clip_score": 37.6875, "loss": 1.5678576231002808 }, { "id": "identity_301511", "image": "AI2D/images/1855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram of a food chain and your knowledge of science, a wolf is a/an?\n\nChoices:\n(A) carnivores\n(B) herbivores\n(C) omnivores\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.625, "loss": 3.9477906227111816 }, { "id": "identity_299277", "image": "CLEVR-Math/images/CLEVR_train_034253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 small yellow objects, how many small yellow objects remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.515625, "loss": 2.508222818374634 }, { "id": "identity_176453", "image": "IconQA/images/64728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the rectangle were to be cut into squares of 1 cm side, how many squares would be formed?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.390625, "loss": 2.3943092823028564 }, { "id": "identity_294209", "image": "A-OKVQA/images/265277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which material is utilized in constructing the runway? \n(A) coal\n(B) metal\n(C) cement\n(D) soil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.15625, "loss": 4.068020820617676 }, { "id": "identity_100756", "image": "TabMWP/images/35657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern in the increase of rubies as the number of bracelets increases?" }, { "from": "gpt", "value": "The answer is Increase by 5 rubies per bracelet" } ], "clip_score": 27.03125, "loss": 2.1266772747039795 }, { "id": "identity_138452", "image": "DVQA/images/bar_train_00096324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the x-axis?" }, { "from": "gpt", "value": "The answer is Values" } ], "clip_score": 28.3125, "loss": 3.2967138290405273 }, { "id": "identity_55724", "image": "DVQA/images/bar_train_00066326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more units of 'object' were sold in the 'twenty' store compared to the 'base' store?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.09375, "loss": 2.3988215923309326 }, { "id": "identity_59636", "image": "FigureQA/images/49115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by Magenta?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 20.359375, "loss": 2.7321603298187256 }, { "id": "identity_9774", "image": "FigureQA/images/93976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Lime Green less than Web Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.8125, "loss": 3.5541141033172607 }, { "id": "identity_30649", "image": "A-OKVQA/images/563371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Besides Asia what continent are these animals found on?\nChoices:\n(A) europe\n(B) antarctica\n(C) south america\n(D) africa" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.8125, "loss": 3.910308837890625 }, { "id": "identity_3164", "image": "VQA-RAD/images/synpic19114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the densities in the lung fields represent?" }, { "from": "gpt", "value": "The answer is pleural plaques" } ], "clip_score": 28.828125, "loss": 3.7811532020568848 }, { "id": "identity_332224", "image": "A-OKVQA/images/327663.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the group of people being watched by authorities likely doing?\nChoices:\n(A) protesting\n(B) enlisting\n(C) dancing\n(D) shopping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.09375, "loss": 4.188119411468506 }, { "id": "identity_241445", "image": "FigureQA/images/78315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in area between this color and Dark Turquoise?" }, { "from": "gpt", "value": "The answer is Light Gold, difference is approximately 100 units" } ], "clip_score": 30.78125, "loss": 3.0917065143585205 }, { "id": "identity_260092", "image": "A-OKVQA/images/342408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible reasons the dog is mostly dry, and how does the presence of an umbrella contribute to this condition compared to the other options?" }, { "from": "gpt", "value": "The answer is The umbrella shields the dog from rain, keeping it mostly dry" } ], "clip_score": 32.46875, "loss": 2.382810592651367 }, { "id": "identity_57203", "image": "FigureQA/images/88010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the range of values on the y-axis for Light Gold greater than that of Dark Gold?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.953125, "loss": 3.553267240524292 }, { "id": "identity_155304", "image": "ScienceQA/images/7324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Chloe and Justin decide to trade items so each can have something they want, which items could they trade?" }, { "from": "gpt", "value": "The answer is Chloe could trade oranges for almonds" } ], "clip_score": 26.125, "loss": 2.5590240955352783 }, { "id": "identity_57084", "image": "DVQA/images/bar_train_00086839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a higher preference for 'office' than 'clock'?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.609375, "loss": 2.4996497631073 }, { "id": "identity_118039", "image": "Super-CLEVR/images/superCLEVR_new_001388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 22.78125, "loss": 2.2447378635406494 }, { "id": "identity_258931", "image": "TabMWP/images/242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of cars that went through the intersection per day over the three days?" }, { "from": "gpt", "value": "The answer is approximately 3,564.33 cars per day" } ], "clip_score": 40.21875, "loss": 1.9057154655456543 }, { "id": "identity_285130", "image": "TQA/images/question_images/earth_moon_phases_2531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the moon phase that follows the waxing crescent? Is it the New Moon, Full Moon, Waxing Gibbous, or First Quarter?\nChoices:\n(A) New Moon\n(B) Full Moon\n(C) Waxing Gibbous\n(D) First Quarter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.875, "loss": 3.6769752502441406 }, { "id": "identity_3167", "image": "FigureQA/images/19525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.921875, "loss": 4.498047828674316 }, { "id": "identity_209938", "image": "A-OKVQA/images/237919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the tennis being played on?" }, { "from": "gpt", "value": "The answer is Grass" } ], "clip_score": 25.65625, "loss": 2.7332656383514404 }, { "id": "identity_171363", "image": "IconQA/images/3534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many small cubes are missing from the fourth cube to make it a complete cube?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.625, "loss": 2.3667681217193604 }, { "id": "identity_4260", "image": "VQA2.0/images/316181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the safety net?" }, { "from": "gpt", "value": "The answer is on other side of field" } ], "clip_score": 17.375, "loss": 3.1973941326141357 }, { "id": "identity_305631", "image": "Super-CLEVR/images/superCLEVR_new_000617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bicycle the same size as the gray airliner near the motorbike?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 26.359375, "loss": 4.781798839569092 }, { "id": "identity_326801", "image": "PMC-VQA/images/PMC6262857_BIO038661F3_402705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated in green?\nChoices:\n(A) Gsh2 \n(B) Pax6 \n(C) DAPI \n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.375, "loss": 4.997471809387207 }, { "id": "identity_284976", "image": "TabMWP/images/27148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between a lawn mower and a lawn sprinkler?" }, { "from": "gpt", "value": "The answer is 31" } ], "clip_score": 35.46875, "loss": 3.3613998889923096 }, { "id": "identity_51882", "image": "FigureQA/images/86089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Gold at x-axis 50 greater than the value of Dark Green at the same point?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.015625, "loss": 3.104252576828003 }, { "id": "identity_194031", "image": "A-OKVQA/images/462953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material are the majority of the suitcases made of?" }, { "from": "gpt", "value": "The answer is Leather" } ], "clip_score": 28.828125, "loss": 3.285278081893921 }, { "id": "identity_47529", "image": "MapQA/images/map_9852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest number of ladies according to the map?" }, { "from": "gpt", "value": "The answer is the name of the state" } ], "clip_score": 34.78125, "loss": 2.937575101852417 }, { "id": "identity_276008", "image": "TQA/images/question_images/cycle_nitrogen_6711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is responsible for producing organic matter?\n(A) fertilizers\n(B) plants and animals\n(C) farms and runoff\n(D) Fossil fuel emissions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.431473731994629 }, { "id": "identity_52251", "image": "MapQA/images/map_12214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the state of New York have a lower value range than the state of Pennsylvania?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.109375, "loss": 2.9391350746154785 }, { "id": "identity_9738", "image": "CLEVR-Math/images/CLEVR_train_041044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cyan cylinders. Subtract all blue spheres. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.09375, "loss": 2.25927472114563 }, { "id": "identity_157001", "image": "TQA/images/question_images/continental_drift_6652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which era is shown at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Present" } ], "clip_score": 19.859375, "loss": 3.031087636947632 }, { "id": "identity_76326", "image": "AI2D/images/71.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final destination of carbon after it goes through combustion according to the diagram?" }, { "from": "gpt", "value": "The answer is CO2 in atmosphere and dissolved in water" } ], "clip_score": 29.78125, "loss": 1.8019574880599976 }, { "id": "identity_331023", "image": "GeoQA+/images/4859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ACB中,∠ACB=100°,∠A=20°,D是AB上一点.将△ABC沿CD折叠,使点B落在AC边上的某处B′,则∠ADB′等于()\nChoices:\n(A) 25°\n(B) 30°\n(C) 35°\n(D) 40°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.859375, "loss": 2.650245189666748 }, { "id": "identity_148589", "image": "DocVQA/images/lfpf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the category of items listed under \"NON-ESSENTIALS\"?" }, { "from": "gpt", "value": "The answer is High in calories with limited other nutritional value" } ], "clip_score": 15.296875, "loss": 2.366039991378784 }, { "id": "identity_129388", "image": "VQA2.0/images/409949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person lying on the surfboard wearing a life jacket?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.84375, "loss": 3.4486641883850098 }, { "id": "identity_163624", "image": "GeoQA+/images/550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of triangle ADE to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/9" } ], "clip_score": 35.53125, "loss": 1.979785680770874 }, { "id": "identity_278600", "image": "TextVQA/images/0f8a4d564b264981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which team is displayed at the top of the scoreboard?" }, { "from": "gpt", "value": "The answer is cardinals" } ], "clip_score": 24.78125, "loss": 3.8852245807647705 }, { "id": "identity_171486", "image": "TabMWP/images/7016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which days did Evelyn send the most e-mails?" }, { "from": "gpt", "value": "The answer is Friday and Saturday" } ], "clip_score": 34.46875, "loss": 2.1656031608581543 }, { "id": "identity_86333", "image": "UniGeo/images/3175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC with DE parallel to BC, if the perimeter of triangle ABC is 24 and AD = 2, BD = 3, what is the length of BC?" }, { "from": "gpt", "value": "The answer is 85/7" } ], "clip_score": 32.25, "loss": 3.042515516281128 }, { "id": "identity_42483", "image": "FigureQA/images/60238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value for Steel Blue?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 22.90625, "loss": 2.491582155227661 }, { "id": "identity_236112", "image": "AI2D/images/3768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which area is closest to the nasal cavity and also directly involved in the respiratory process?" }, { "from": "gpt", "value": "The answer is nasopharynx" } ], "clip_score": 34.5, "loss": 1.7265788316726685 }, { "id": "identity_128245", "image": "PMC-VQA/images/PMC7694624_diagnostics-10-00908-f001_307388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the middle image represent when compared to the first and last images?" }, { "from": "gpt", "value": "The answer is Fusion of CT and PET imaging" } ], "clip_score": 28.953125, "loss": 3.007075071334839 }, { "id": "identity_32250", "image": "A-OKVQA/images/242592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the colorful machines called?\nChoices:\n(A) garbage bins\n(B) mailboxes\n(C) vending machines\n(D) storage lockers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.59375, "loss": 4.516768455505371 }, { "id": "identity_80382", "image": "Geometry3K/images/1473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m∠AFB = 8x - 6 and m∠BFC = 14x + 8, find the measure of ∠BFD where point D is on line AC extended" }, { "from": "gpt", "value": "The answer is 50.6 degrees" } ], "clip_score": 29.09375, "loss": 2.6283552646636963 }, { "id": "identity_19424", "image": "GeoQA+/images/3053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,定点C、动点D在⊙O上,并且位于直径AB的两侧,AB=5,AC=3,过点C在作CE⊥CD交DB的延长线于点E,则线段CE长度的最大值为()\nChoices:\n(A) 5\n(B) 8\n(C) \\frac{32}{5}\n(D) \\frac{20}{3}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.875, "loss": 2.83237886428833 }, { "id": "identity_16466", "image": "VQA2.0/images/81128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the man carrying a purse?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.21875, "loss": 4.407174110412598 }, { "id": "identity_46386", "image": "FigureQA/images/71755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Pale Green line?" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 24.421875, "loss": 3.163163423538208 }, { "id": "identity_298704", "image": "A-OKVQA/images/488244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason are the individuals parading on elephants through the streets?\nChoices:\n(A) to colonize\n(B) to destroy\n(C) to celebrate\n(D) to subdue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.046875, "loss": 4.255733489990234 }, { "id": "identity_36107", "image": "PMC-VQA/images/PMC6071436_rcr2357-fig-0001_349117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging test was done and where was the pulmonary cryptococcoma located? \nChoices:\n(A) MRI thorax with left lower lobe \n(B) X-ray thorax with left upper lobe \n(C) CT thorax with right upper lobe \n(D) PET scan thorax with right lower lobe " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.78125, "loss": 3.2453866004943848 }, { "id": "identity_62247", "image": "DocVQA/images/qybv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the event being advertised?" }, { "from": "gpt", "value": "The answer is Third Annual Institute of Alcohol and Drug Problems" } ], "clip_score": 33.0625, "loss": 1.1893846988677979 }, { "id": "identity_323490", "image": "PMC-VQA/images/PMC7801276_F1-ad-12-1-203_340304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan of the individual with cortical venous thrombosis reveal?\nChoices:\n(A) Hemorrhagic stroke in a temporal lobe\n(B) Venous hemorrhagic infarct in a parietal lobe\n(C) Disappearance of some cortical veins\n(D) Normal brain scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.84375, "loss": 4.784470081329346 }, { "id": "identity_215733", "image": "TextVQA/images/37b20486199a4dc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport are these athletes playing?" }, { "from": "gpt", "value": "The answer is Softball" } ], "clip_score": 28.140625, "loss": 2.1020939350128174 }, { "id": "identity_299533", "image": "A-OKVQA/images/294758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of company is the sponsor of this game? \n(A) computer\n(B) car\n(C) basketball\n(D) canned bean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.5546875, "loss": 4.341961860656738 }, { "id": "identity_292136", "image": "PMC-VQA/images/PMC2100056_F1_15102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the indication provided by the 18FDG-PET scan?\nChoices:\n(A) Brain tumor \n(B) Breast cancer \n(C) Adrenal gland tumor \n(D) Lung cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 4.274719715118408 }, { "id": "identity_319046", "image": "CLEVR-Math/images/CLEVR_train_040817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all small cylinders. How many cylinders remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.875, "loss": 2.3520843982696533 }, { "id": "identity_315717", "image": "Super-CLEVR/images/superCLEVR_new_001751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the purple thing and the big red object have the same shape?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.078125, "loss": 5.6253461837768555 }, { "id": "identity_203179", "image": "VQA2.0/images/37863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any standing lamps visible in the outdoor area?" }, { "from": "gpt", "value": "The answer is Yes, there are standing lamps" } ], "clip_score": 30.796875, "loss": 2.4796197414398193 }, { "id": "identity_41738", "image": "FigureQA/images/14708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the Tomato section to the Khaki section?" }, { "from": "gpt", "value": "The answer is 7:1" } ], "clip_score": 37.46875, "loss": 2.051577091217041 }, { "id": "identity_113792", "image": "PMC-VQA/images/PMC4604167_pone.0140476.g005_432976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.28125, "loss": 2.285353183746338 }, { "id": "identity_283220", "image": "DocVQA/images/lpbm0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of typewriter is favored?" }, { "from": "gpt", "value": "The answer is electric" } ], "clip_score": 21.15625, "loss": 3.043184518814087 }, { "id": "identity_116097", "image": "PMC-VQA/images/PMC6563475_cne24657-fig-0009_493022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label written in the top left corner of the image?" }, { "from": "gpt", "value": "The answer is TRH NucBlue" } ], "clip_score": 28.453125, "loss": 2.3478810787200928 }, { "id": "identity_191234", "image": "VQA2.0/images/211945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the dog's goggles?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.15625, "loss": 3.1210103034973145 }, { "id": "identity_291423", "image": "CLEVR-Math/images/CLEVR_train_024239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 8 rubber cylinders, how many rubber cylinders remain?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 20.484375, "loss": 1.9575347900390625 }, { "id": "identity_336637", "image": "TabMWP/images/4872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given schedule, when does the reptile show start?\nChoices:\n(A) 11:45 A.M.\n(B) 1:20 P.M.\n(C) 4:25 P.M.\n(D) 1:45 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.890625, "loss": 3.3626182079315186 }, { "id": "identity_249832", "image": "PMC-VQA/images/PMC2562460_F6_28478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structures labeled \"dvm,\" \"lm,\" and \"cm\" in the image, and explain their functional roles in the organism" }, { "from": "gpt", "value": "The answer is Diagonal muscles, longitudinal muscles, circular muscles" } ], "clip_score": 28.34375, "loss": 3.18951153755188 }, { "id": "identity_142320", "image": "DocVQA/images/yscw0217_188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean body mass index (BMI) of the participants?" }, { "from": "gpt", "value": "The answer is 26.1 kg/m²" } ], "clip_score": 23.8125, "loss": 1.5892034769058228 }, { "id": "identity_308346", "image": "TextVQA/images/bee12950e4213f2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was this made?" }, { "from": "gpt", "value": "The answer is january 1984" } ], "clip_score": 15.6953125, "loss": 1.9059947729110718 }, { "id": "identity_109200", "image": "VQA-AS/images/7403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person on the right wearing?" }, { "from": "gpt", "value": "The answer is black shoes" } ], "clip_score": 18.671875, "loss": 2.4998092651367188 }, { "id": "identity_1058", "image": "PMC-VQA/images/PMC6330477_Fig6_421811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the length of the spinal canal affected in the image? \nChoices:\n(A) T2-T5 \n(B) T6-T12 \n(C) T2-L2 \n(D) T6-L2. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.828125, "loss": 4.26725959777832 }, { "id": "identity_167353", "image": "IconQA/images/4930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers have a visible yellow center?" }, { "from": "gpt", "value": "The answer is 4 flowers" } ], "clip_score": 27.28125, "loss": 2.8839848041534424 }, { "id": "identity_100634", "image": "TabMWP/images/33106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does the book signed by a famous author cost compared to the photograph signed by a famous race car driver?" }, { "from": "gpt", "value": "The answer is $1,074" } ], "clip_score": 20.9375, "loss": 2.4548988342285156 }, { "id": "identity_38498", "image": "DocVQA/images/sylm0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Permit No given in this document?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 19.46875, "loss": 3.5749142169952393 }, { "id": "identity_286187", "image": "ScienceQA/images/3320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Rhode Island?\n(A) Providence\n(B) Milwaukee\n(C) Raleigh\n(D) Boston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.75, "loss": 3.984213352203369 }, { "id": "identity_162096", "image": "GeoQA+/images/2502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the side length of square 2 is a and the side length of square 3 is b, what is the value of a^2 + b^2?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.234375, "loss": 2.373187780380249 }, { "id": "identity_49718", "image": "FigureQA/images/36621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sum of Red and Cornflower segments greater than 50%?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.328125, "loss": 3.480804443359375 }, { "id": "identity_224721", "image": "TabMWP/images/13947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of offices in the building?" }, { "from": "gpt", "value": "The answer is 1622" } ], "clip_score": 24.515625, "loss": 2.515596628189087 }, { "id": "identity_73710", "image": "ScienceQA/images/414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many specific humidity levels are depicted in the image?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 22.390625, "loss": 2.4741554260253906 }, { "id": "identity_68905", "image": "AI2D/images/1469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the underground pool of molten rock?" }, { "from": "gpt", "value": "The answer is Magma Chamber" } ], "clip_score": 22.765625, "loss": 2.4429855346679688 }, { "id": "identity_218384", "image": "TextVQA/images/0f2c104be4ff3bd4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of pizza is offered at the pizza station?" }, { "from": "gpt", "value": "The answer is Little Caesars" } ], "clip_score": 22.359375, "loss": 2.263533115386963 }, { "id": "identity_166485", "image": "TabMWP/images/31497.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 drawers are removed, how many hammers will be left?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 33.71875, "loss": 2.064476251602173 }, { "id": "identity_213603", "image": "PMC-VQA/images/PMC4896236_Fig1_38560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the enzyme's surface representation in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 26.5625, "loss": 4.9098286628723145 }, { "id": "identity_282931", "image": "A-OKVQA/images/518866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is used to pull this boat: twine, yarn, cables, or string?\nChoices:\n(A) twine\n(B) yarn\n(C) cables\n(D) string" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.453125, "loss": 3.7980422973632812 }, { "id": "identity_183156", "image": "A-OKVQA/images/351367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red triangular markers can be seen on the snow?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.546875, "loss": 2.5046517848968506 }, { "id": "identity_84942", "image": "GeoQA+/images/756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠ADC = 30° and OD is the radius of the circle with center O, what is the measure of ∠CDO?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 36.28125, "loss": 1.7991694211959839 }, { "id": "identity_5434", "image": "VQA2.0/images/293233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the batter?" }, { "from": "gpt", "value": "The answer is boggs" } ], "clip_score": 24.0625, "loss": 4.309846878051758 }, { "id": "identity_233964", "image": "DocVQA/images/fmlh0077_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Program Number and the Tape Due date?" }, { "from": "gpt", "value": "The answer is Program Number: 600900, Tape Due: 5/20/96" } ], "clip_score": 23.9375, "loss": 1.7191606760025024 }, { "id": "identity_63350", "image": "ScienceQA/images/3301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the basketball jersey?" }, { "from": "gpt", "value": "The answer is Adidas" } ], "clip_score": 27.390625, "loss": 3.192342758178711 }, { "id": "identity_124358", "image": "Super-CLEVR/images/superCLEVR_new_000968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more airplanes than motorcycles in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.671875, "loss": 3.123115301132202 }, { "id": "identity_215698", "image": "PMC-VQA/images/PMC4700043_Fig4_458890.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What strength of magnetic field was used in this MRI scan?" }, { "from": "gpt", "value": "The answer is 7 Tesla" } ], "clip_score": 31.4375, "loss": 1.317936897277832 }, { "id": "identity_169459", "image": "IconQA/images/38588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 2 rectangles, how many will remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.546875, "loss": 2.2923598289489746 }, { "id": "identity_92615", "image": "TabMWP/images/17019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $345, by how many units does the quantity supplied exceed the quantity demanded?" }, { "from": "gpt", "value": "The answer is 6,300" } ], "clip_score": 32.5, "loss": 2.000913143157959 }, { "id": "identity_316145", "image": "IconQA/images/88603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.390625, "loss": 4.469418525695801 }, { "id": "identity_17152", "image": "PMC-VQA/images/PMC8670330_F3_138900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image shows the topography map centered on the crater? \nChoices:\n(A) Left image \n(B) Middle image \n(C) Right image \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.59375, "loss": 3.838737964630127 }, { "id": "identity_225871", "image": "FigureQA/images/33751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage is Crimson greater than Coral?" }, { "from": "gpt", "value": "The answer is 0%" } ], "clip_score": 28.71875, "loss": 1.9537124633789062 }, { "id": "identity_240949", "image": "IconQA/images/64610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the cactus is 2 cubes tall, how many cubes tall is the white rectangle?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 27.953125, "loss": 2.4881134033203125 }, { "id": "identity_175723", "image": "IconQA/images/18857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are there between the second star and the second rocket?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.5625, "loss": 2.3484673500061035 }, { "id": "identity_41221", "image": "FigureQA/images/63796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Orange Red line consistently above the Peru line throughout the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.421875, "loss": 3.5980255603790283 }, { "id": "identity_324505", "image": "VQA2.0/images/521884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How tall are these?" }, { "from": "gpt", "value": "The answer is 2 stories" } ], "clip_score": 15.046875, "loss": 2.53786039352417 }, { "id": "identity_56672", "image": "FigureQA/images/80360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Firebrick at x-axis label 50?" }, { "from": "gpt", "value": "The answer is 82" } ], "clip_score": 24.453125, "loss": 3.1909432411193848 }, { "id": "identity_293416", "image": "PMC-VQA/images/PMC8008815_f1_406324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After 5 weeks, what does scan 3 reveal?\n(A) Presence of an atrophic area in the right frontal lobe.\n(B) Presence of an atrophic area in the left occipital lobe.\n(C) Presence of an atrophic area in the right occipital lobe.\n(D) Presence of an atrophic area in the left frontal lobe." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.1875, "loss": 3.5296950340270996 }, { "id": "identity_14208", "image": "VQA2.0/images/569816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the yellow objects?" }, { "from": "gpt", "value": "The answer is machines" } ], "clip_score": 16.984375, "loss": 3.84820556640625 }, { "id": "identity_118056", "image": "TextVQA/images/fbedae21670c9123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the audience seating?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 10.8984375, "loss": 3.476527690887451 }, { "id": "identity_148136", "image": "DocVQA/images/tshd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned in the letter?" }, { "from": "gpt", "value": "The answer is October 28, 1975" } ], "clip_score": 22.421875, "loss": 1.7577683925628662 }, { "id": "identity_316728", "image": "IconQA/images/77401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes have this color?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.390625, "loss": 2.2279911041259766 }, { "id": "identity_271190", "image": "PMC-VQA/images/PMC6968987_Fig1_102566.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area in the image displays hyperintense signal, with the options being:\n(A) Anterior limb of internal capsule\n(B) Posterior limb of internal capsule\n(C) Corpus callosum\n(D) Thalamus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.859375, "loss": 4.2843918800354 }, { "id": "identity_51082", "image": "DocVQA/images/jzhd0227_28.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the zip code of the location where Robert E. Shank, M.D. works?" }, { "from": "gpt", "value": "The answer is 63110" } ], "clip_score": 17.21875, "loss": 2.318350076675415 }, { "id": "identity_282676", "image": "IconQA/images/83363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity did Jim do first after lunch on Wednesday, out of writing a story, watching a movie, and going shopping?\nChoices:\n(A) wrote a story\n(B) watched a movie\n(C) went shopping" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.78125, "loss": 3.7856223583221436 }, { "id": "identity_220467", "image": "A-OKVQA/images/360877.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bunches of bananas are hanging from the roof?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.90625, "loss": 2.3619325160980225 }, { "id": "identity_224900", "image": "TextVQA/images/043a59888f5bcb7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of cleaning products are visible on the shelves, and what are their brands?" }, { "from": "gpt", "value": "The answer is 3 types; Sun, Ajax, and others" } ], "clip_score": 32.65625, "loss": 2.8572323322296143 }, { "id": "identity_287244", "image": "GeoQA+/images/3669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, circle O is the circumcircle of right triangle ABC. Line OE is perpendicular to AB and intersects circle O at point E, with D as the foot of the perpendicular. The extensions of AE and CB intersect at point F. If OD = 3 and AB = 8, what is the length of FC?\nChoices:\n(A) 10\n(B) 8\n(C) 6\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.25, "loss": 2.4464898109436035 }, { "id": "identity_325298", "image": "A-OKVQA/images/20774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What problem are the individuals on the right experiencing?\nChoices:\n(A) getting soaked\n(B) getting thirsty\n(C) getting tired\n(D) getting sunburned" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.96875, "loss": 4.6457438468933105 }, { "id": "identity_53310", "image": "DocVQA/images/yzbd0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last event scheduled for the fourth day?" }, { "from": "gpt", "value": "The answer is Preparation of the final report" } ], "clip_score": 22.625, "loss": 2.444472312927246 }, { "id": "identity_106664", "image": "TextVQA/images/99a929c33603be8f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color theme of the perfume display?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 32.96875, "loss": 3.220595598220825 }, { "id": "identity_239632", "image": "TabMWP/images/11800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of new customers from Wednesday to Saturday, and how does it compare to the number of new customers on Thursday?" }, { "from": "gpt", "value": "The answer is The average is higher than Thursday's new customers" } ], "clip_score": 28.515625, "loss": 2.8518176078796387 }, { "id": "identity_96007", "image": "TabMWP/images/1071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did the waitress work exactly 8 hours?" }, { "from": "gpt", "value": "The answer is 5 days" } ], "clip_score": 29.671875, "loss": 2.365072011947632 }, { "id": "identity_273519", "image": "AI2D/images/3122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the letter D in the given options? Is it:\n(A) The Motor\n(B) The Power Cord\n(C) The Switch\n(D) The Brushes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5625, "loss": 4.315893173217773 }, { "id": "identity_205146", "image": "PMC-VQA/images/PMC5265979_F3_119160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.46875, "loss": 2.7322041988372803 }, { "id": "identity_53911", "image": "FigureQA/images/48888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of the Firebrick color in the pie chart?" }, { "from": "gpt", "value": "The answer is Firebrick is approximately 85%" } ], "clip_score": 38.71875, "loss": 2.387385368347168 }, { "id": "identity_184026", "image": "PMC-VQA/images/PMC3483267_pone-0048512-g005_162432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique might have been used to produce the colors in this image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) stain" } ], "clip_score": 27.421875, "loss": 1.0865222215652466 }, { "id": "identity_53952", "image": "DocVQA/images/tkbv0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many age groups are represented in the graph?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.96875, "loss": 2.6724472045898438 }, { "id": "identity_259299", "image": "PMC-VQA/images/PMC3777960_pone-0074753-g003_232606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the areas of necrosis in the figure with the treated and untreated tumors. Which group shows a higher percentage of necrosis, and by how much?" }, { "from": "gpt", "value": "The answer is Untreated tumors show a higher percentage of necrosis by 20%" } ], "clip_score": 24.109375, "loss": 1.2488664388656616 }, { "id": "identity_9742", "image": "DVQA/images/bar_train_00137903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.03125, "loss": 4.344825744628906 }, { "id": "identity_29734", "image": "A-OKVQA/images/102655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the white squares on the stadium seats?\nChoices:\n(A) cameras\n(B) seat numbers\n(C) fans' names\n(D) decoration" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.390625, "loss": 4.552430629730225 }, { "id": "identity_228639", "image": "UniGeo/images/338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB、CD是⊙O的两条直径,且AB⊥CD,⁀{CE}=\\frac{1}{2}⁀{EB},P为直径CD上一动点,若⊙O的直径AB=2,求△PEB周长的最小值时,当P在CD上何处时,△PEB的面积最大?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 31.015625, "loss": 2.58158016204834 }, { "id": "identity_141284", "image": "DVQA/images/bar_train_00143393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the bar labeled \"deck\"?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.75, "loss": 2.6240904331207275 }, { "id": "identity_133095", "image": "Super-CLEVR/images/superCLEVR_new_000350.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What musical instrument is present in the image?" }, { "from": "gpt", "value": "The answer is Trumpet" } ], "clip_score": 13.21875, "loss": 2.4714853763580322 }, { "id": "identity_167104", "image": "IconQA/images/46988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides does the base of this shape have?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.109375, "loss": 2.3925015926361084 }, { "id": "identity_169018", "image": "TabMWP/images/7311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of miles Dylan drove from Thursday to Monday?" }, { "from": "gpt", "value": "The answer is 36 miles" } ], "clip_score": 39.03125, "loss": 1.9277278184890747 }, { "id": "identity_16424", "image": "CLEVR-Math/images/CLEVR_train_047887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all small purple rubber cylinders. Subtract all large purple matte cubes. How many objects are left?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.453125, "loss": 2.3654839992523193 }, { "id": "identity_224304", "image": "IconQA/images/33465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you cut the twig into three equal parts, what would be the length of each part to the nearest inch?" }, { "from": "gpt", "value": "The answer is 1 inch" } ], "clip_score": 37.6875, "loss": 1.9098080396652222 }, { "id": "identity_186105", "image": "VQA2.0/images/475076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style does the building in the background represent?" }, { "from": "gpt", "value": "The answer is Indian architecture" } ], "clip_score": 22.3125, "loss": 3.277336835861206 }, { "id": "identity_23814", "image": "Geometry3K/images/1165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If m \\angle Z X W = x - 11 and m \\angle W Z X = x - 9, find m \\angle Z X Y.\nChoices:\n(A) 24\n(B) 46\n(C) 48\n(D) 96" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.84375, "loss": 2.4384803771972656 }, { "id": "identity_36404", "image": "TabMWP/images/9083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does pasta with tomato sauce cost than pasta with clams?" }, { "from": "gpt", "value": "The answer is 1.60" } ], "clip_score": 32.625, "loss": 2.0600783824920654 }, { "id": "identity_17206", "image": "DVQA/images/bar_train_00082082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first element from the left in each stack of bars?" }, { "from": "gpt", "value": "The answer is check" } ], "clip_score": 20.3125, "loss": 3.8587021827697754 }, { "id": "identity_295642", "image": "IconQA/images/66241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored blue? Is it (A) 7/11, (B) 1/2, (C) 3/7, or (D) 4/5?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 3.2001709938049316 }, { "id": "identity_249353", "image": "TextVQA/images/554140b113c2f1c9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books with a tree on them are visible in the image, and what is the name of the book with the most copies?" }, { "from": "gpt", "value": "The answer is 5, Jovem Senador" } ], "clip_score": 19.625, "loss": 2.481569528579712 }, { "id": "identity_135308", "image": "FigureQA/images/77539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest starting point on the y-axis?" }, { "from": "gpt", "value": "The answer is Dark Gold" } ], "clip_score": 26.203125, "loss": 2.9075927734375 }, { "id": "identity_17527", "image": "UniGeo/images/2051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示的图案是一个轴对称图形,直线l是它的一条对称轴,如果最大圆的半径为2,那么阴影部分面积是()\nChoices:\n(A) π\n(B) 2π\n(C) 3π\n(D) 4π" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.9296875, "loss": 2.921912908554077 }, { "id": "identity_74247", "image": "AI2D/images/727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Monarch larva shed its skin before pupating?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.75, "loss": 1.804669737815857 }, { "id": "identity_264165", "image": "FigureQA/images/39544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.96875, "loss": 4.240511417388916 }, { "id": "identity_86402", "image": "UniGeo/images/698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side BC in right triangle ABC?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 36.1875, "loss": 2.576017379760742 }, { "id": "identity_13853", "image": "UniGeo/images/2907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C三点都在⊙O上,若∠C=34°,则∠AOB的度数是()\nChoices:\n(A) 68°\n(B) 34°\n(C) 134°\n(D) 168°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.8125, "loss": 2.591479778289795 }, { "id": "identity_323741", "image": "PMC-VQA/images/PMC4122023_fig13_311483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in the brain?\nChoices:\n(A) Left temporal gyrus\n(B) Right frontal lobe\n(C) Left frontal lobe\n(D) Basal ganglia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.703125, "loss": 3.791419267654419 }, { "id": "identity_204771", "image": "TextVQA/images/0d4f6be234078add.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beer is shown in the image?" }, { "from": "gpt", "value": "The answer is Doppelbock" } ], "clip_score": 26.421875, "loss": 2.9623661041259766 }, { "id": "identity_88398", "image": "UniGeo/images/2743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠OBC = 50°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 36.84375, "loss": 1.7724272012710571 }, { "id": "identity_187612", "image": "PMC-VQA/images/PMC8126415_DEV194597F1_443014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the images in the top and bottom rows from the same developmental stage?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.875, "loss": 2.912444591522217 }, { "id": "identity_155886", "image": "AI2D/images/166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Earth orbit the Sun in the diagram?" }, { "from": "gpt", "value": "The answer is One full orbit" } ], "clip_score": 32.125, "loss": 3.007053852081299 }, { "id": "identity_84914", "image": "UniGeo/images/1922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB = 10cm and NB = 2cm, what is the length of AN?" }, { "from": "gpt", "value": "The answer is AN = 8cm" } ], "clip_score": 26.5, "loss": 1.7248555421829224 }, { "id": "identity_79969", "image": "UniGeo/images/2407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line segment AD?" }, { "from": "gpt", "value": "The answer is √416 or approximately 20.4" } ], "clip_score": 25.265625, "loss": 2.5321455001831055 }, { "id": "identity_61505", "image": "DocVQA/images/yyvv0228_32.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much was spent on Pediatric Nutrition?" }, { "from": "gpt", "value": "The answer is $14,705.00" } ], "clip_score": 22.75, "loss": 1.6383172273635864 }, { "id": "identity_262333", "image": "Geometry3K/images/662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle J.\nChoices:\n(A) 52\n(B) 62\n(C) 72\n(D) 150" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.984375, "loss": 2.61388897895813 }, { "id": "identity_114375", "image": "TextVQA/images/513e609b75c5dd65.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more people on the sand or in the water?" }, { "from": "gpt", "value": "The answer is On the sand" } ], "clip_score": 19.703125, "loss": 2.6295578479766846 }, { "id": "identity_56419", "image": "FigureQA/images/71519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Dark Turquoise between x-axis labels 0 and 100?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 29.3125, "loss": 2.2141213417053223 }, { "id": "identity_202729", "image": "PMC-VQA/images/PMC8036118_F3_413541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of bone fracture in the knee image?" }, { "from": "gpt", "value": "The answer is No bone fracture visible" } ], "clip_score": 30.140625, "loss": 2.852592945098877 }, { "id": "identity_201453", "image": "PMC-VQA/images/PMC7923829_Fig8_381636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure indicated by label 1a in the figure?" }, { "from": "gpt", "value": "The answer is Superior vena cava" } ], "clip_score": 26.296875, "loss": 2.316490888595581 }, { "id": "identity_2046", "image": "A-OKVQA/images/260182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what setting is this street scene?\nChoices:\n(A) rural\n(B) urban\n(C) farm\n(D) suburban" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5078125, "loss": 4.766650199890137 }, { "id": "identity_231771", "image": "PlotQA/images/127911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country had the highest percentage increase in the amount of ICT goods exported from 2000 to 2002?" }, { "from": "gpt", "value": "The answer is Lithuania" } ], "clip_score": 31.171875, "loss": 2.499976634979248 }, { "id": "identity_23477", "image": "A-OKVQA/images/127074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the bag on the grass in front of this man?\nChoices:\n(A) duffle bag\n(B) sports bag\n(C) drawstring bag\n(D) handbag" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.453125, "loss": 4.2408952713012695 }, { "id": "identity_119394", "image": "Super-CLEVR/images/superCLEVR_new_000885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more yellow vehicles than green vehicles in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.453125, "loss": 3.3339850902557373 }, { "id": "identity_317184", "image": "ScienceQA/images/13903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An international group consists of members from () who ().\nChoices:\n(A) different countries . . . declare war on other countries\n(B) different countries . . . work together for a shared purpose\n(C) the same country . . . work together for a shared purpose\n(D) the same country . . . declare war on other countries" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.375, "loss": 3.8134961128234863 }, { "id": "identity_8339", "image": "CLEVR-Math/images/CLEVR_train_012685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 7 blocks. How many blocks exist?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 17.84375, "loss": 2.1518194675445557 }, { "id": "identity_151275", "image": "ScienceQA/images/4318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a mint mark on the penny?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.09375, "loss": 3.153001070022583 }, { "id": "identity_98045", "image": "TabMWP/images/21618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of the three friends?" }, { "from": "gpt", "value": "The answer is 647.67" } ], "clip_score": 29.0625, "loss": 1.9727627038955688 }, { "id": "identity_334743", "image": "AI2D/images/303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the killer whale represent in the food web?\nChoices:\n(A) Prey\n(B) Producer\n(C) Predator\n(D) Parasite" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.6875, "loss": 4.311652183532715 }, { "id": "identity_310175", "image": "ScienceQA/images/4591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which place is highlighted?\nChoices:\n(A) Tuvalu\n(B) Fiji\n(C) Tonga\n(D) Kiribati" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.640625, "loss": 4.061017036437988 }, { "id": "identity_13548", "image": "UniGeo/images/1055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,E为AD延长线上一点,若∠CDE=80°,则∠B等于()\nChoices:\n(A) 60°\n(B) 70°\n(C) 80°\n(D) 90°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.078125, "loss": 2.675945281982422 }, { "id": "identity_225665", "image": "FigureQA/images/86224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis value does the Coral line first surpass the Dark Turquoise line?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 29.6875, "loss": 2.0305259227752686 }, { "id": "identity_71617", "image": "AI2D/images/109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After how many instars does the locust reach the hopper development stage?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.84375, "loss": 2.2619917392730713 }, { "id": "identity_13865", "image": "PMC-VQA/images/PMC7803889_F2_341306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the three measurements taken from the axial T2-weighted image? \nChoices:\n(A) Lumen area, front-back diameter, transversal diameter \n(B) Lumen area, left-right diameter, oblique diameter \n(C) Front-back diameter, left-right diameter, transversal diameter \n(D) Lumen area, left-right diameter, transversal diameter " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.78125, "loss": 3.137403964996338 }, { "id": "identity_110576", "image": "Super-CLEVR/images/superCLEVR_new_001206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the aeroplane that is behind the yellow bicycle and in front of the red motorbike?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 29.625, "loss": 3.4350078105926514 }, { "id": "identity_173246", "image": "IconQA/images/95762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many orange sections are there on the football?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.015625, "loss": 2.2616236209869385 }, { "id": "identity_125578", "image": "PMC-VQA/images/PMC7346308_FIG3_208864.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of additional imaging is indicated on the left side of the image?" }, { "from": "gpt", "value": "The answer is PET (Positron Emission Tomography)" } ], "clip_score": 30.96875, "loss": 1.4069361686706543 }, { "id": "identity_244776", "image": "VizWiz/images/VizWiz_train_00023763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct items are visible on the table, and what are they?" }, { "from": "gpt", "value": "The answer is Seven items: coffee mug, tissue paper, letters, pen, figurine, coaster, stapler" } ], "clip_score": 27.828125, "loss": 2.6519775390625 }, { "id": "identity_25021", "image": "ScienceQA/images/17019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 3.6400883197784424 }, { "id": "identity_337837", "image": "PMC-VQA/images/PMC9386117_F1_374849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of surgical procedure was done based on the caption?\nChoices:\n(A) Posterior surgery\n(B) Anterior surgery\n(C) Laminectomy\n(D) Discectomy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 4.0373711585998535 }, { "id": "identity_308018", "image": "ChartQA/images/multi_col_80291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum total of all the countries in 2015?" }, { "from": "gpt", "value": "The answer is 52.9" } ], "clip_score": 23.140625, "loss": 2.3335800170898438 }, { "id": "identity_305393", "image": "ScienceQA/images/17522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of these particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.421875, "loss": 3.089845657348633 }, { "id": "identity_61160", "image": "FigureQA/images/40536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart represent an equal distribution of Crimson and Blue?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.96875, "loss": 3.0576438903808594 }, { "id": "identity_235519", "image": "A-OKVQA/images/256276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What evidence in the image suggests that the bear is being fed by the woman, and what other elements in the image support this conclusion?" }, { "from": "gpt", "value": "The answer is The woman is feeding the bear, evidenced by her hand gesture" } ], "clip_score": 29.296875, "loss": 2.972531318664551 }, { "id": "identity_18267", "image": "UniGeo/images/1202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O中,直径AB与弦CD相交于点E,连接BC,AD,过点C的切线与AB的延长线交于点F,若∠D=65°,则∠F的度数等于()\nChoices:\n(A) 30°\n(B) 35°\n(C) 40°\n(D) 45°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.875, "loss": 2.733621597290039 }, { "id": "identity_304350", "image": "TQA/images/abc_question_images/human_system_eye_16145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates this?\nChoices:\n(A) P\n(B) E\n(C) U\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.796875, "loss": 4.046969890594482 }, { "id": "identity_179515", "image": "TabMWP/images/36034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a recipe calls for 4 pounds of tomatoes and you can only buy roma or cherry tomatoes, which option is cheaper and by how much?" }, { "from": "gpt", "value": "The answer is Both options cost the same, $12" } ], "clip_score": 34.34375, "loss": 2.881124973297119 }, { "id": "identity_149322", "image": "DocVQA/images/sykk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the latest month and year mentioned for 'Agricultural Age'?" }, { "from": "gpt", "value": "The answer is May 76" } ], "clip_score": 24.46875, "loss": 2.719202995300293 }, { "id": "identity_289641", "image": "TabMWP/images/13693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money required by Ivan to purchase a thin green highlighter, a bright yellow highlighter, and a silver pen?" }, { "from": "gpt", "value": "The answer is 0.19" } ], "clip_score": 33.96875, "loss": 2.3939013481140137 }, { "id": "identity_295338", "image": "VizWiz/images/VizWiz_train_00003865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what is shown in this image?" }, { "from": "gpt", "value": "The answer is medical equipment" } ], "clip_score": 29.71875, "loss": 4.0581278800964355 }, { "id": "identity_117253", "image": "VizWiz/images/VizWiz_train_00017344.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many app icons have a predominantly blue color?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.21875, "loss": 2.4304213523864746 }, { "id": "identity_39583", "image": "DVQA/images/bar_train_00172718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of saline and chief in death?" }, { "from": "gpt", "value": "The answer is Approximately 12" } ], "clip_score": 23.46875, "loss": 3.0064191818237305 }, { "id": "identity_175924", "image": "IconQA/images/100617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the squares on the frame have stars?" }, { "from": "gpt", "value": "The answer is 4/8 or 1/2" } ], "clip_score": 33.65625, "loss": 1.4422261714935303 }, { "id": "identity_104009", "image": "VQA-AS/images/13004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What piece of furniture is to the left of the couch?" }, { "from": "gpt", "value": "The answer is An armchair" } ], "clip_score": 26.859375, "loss": 3.600881814956665 }, { "id": "identity_129930", "image": "Super-CLEVR/images/superCLEVR_new_000770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of wheels visible in the image?" }, { "from": "gpt", "value": "The answer is Twenty" } ], "clip_score": 16.765625, "loss": 3.590057611465454 }, { "id": "identity_78502", "image": "TQA/images/question_images/geologic_time_6919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many geological periods are listed within the Phanerozoic Eon?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 26.6875, "loss": 2.5167133808135986 }, { "id": "identity_57192", "image": "DVQA/images/bar_train_00007809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more units of 'ice' were sold compared to 'desk' in the 'island' store?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 39.21875, "loss": 2.2350285053253174 }, { "id": "identity_284603", "image": "DVQA/images/bar_train_00058330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the palegoldenrod color?" }, { "from": "gpt", "value": "The answer is brain" } ], "clip_score": 22.890625, "loss": 4.956675052642822 }, { "id": "identity_175767", "image": "IconQA/images/35913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 30.296875, "loss": 2.027895212173462 }, { "id": "identity_306397", "image": "TabMWP/images/1364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For a project, Victor counted the number of classrooms in each school in the city. What is the highest number of classrooms?" }, { "from": "gpt", "value": "The answer is 68" } ], "clip_score": 29.640625, "loss": 2.7281107902526855 }, { "id": "identity_103636", "image": "VQA2.0/images/284667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear do the horses have on?" }, { "from": "gpt", "value": "The answer is Horseshoes" } ], "clip_score": 22.671875, "loss": 1.7429319620132446 }, { "id": "identity_100360", "image": "TabMWP/images/34195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what would be the y value when x is 21?" }, { "from": "gpt", "value": "The answer is -3" } ], "clip_score": 28.96875, "loss": 3.4282681941986084 }, { "id": "identity_272925", "image": "AI2D/images/2007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which of the following sources do deer obtain energy?\nChoices:\n(A) Snake\n(B) Plants\n(C) Beetle\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.03125, "loss": 4.017544746398926 }, { "id": "identity_17019", "image": "PlotQA/images/1205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and the lowest percentage of female repeaters ?" }, { "from": "gpt", "value": "The answer is 0.23767995834351008" } ], "clip_score": 24.640625, "loss": 2.758237600326538 }, { "id": "identity_303020", "image": "DocVQA/images/zjkp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the invoice?" }, { "from": "gpt", "value": "The answer is 8704" } ], "clip_score": 20.65625, "loss": 2.433833360671997 }, { "id": "identity_239271", "image": "TextVQA/images/fe89ea91f8cb18f8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the theater production advertised on the large sign at the bottom of the image, and which company is advertising on the blue sign written vertically?" }, { "from": "gpt", "value": "The answer is The Color Purple, Argent" } ], "clip_score": 31.734375, "loss": 4.48829984664917 }, { "id": "identity_145880", "image": "FigureQA/images/40801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values less than 10?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.484375, "loss": 2.673964738845825 }, { "id": "identity_99950", "image": "TabMWP/images/36239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the greatest decrease in winning time from one year to the next?" }, { "from": "gpt", "value": "The answer is 4 minutes (from 2009 to 2010)" } ], "clip_score": 30.65625, "loss": 1.1289045810699463 }, { "id": "identity_56911", "image": "FigureQA/images/12624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Firebrick section represent more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.40625, "loss": 3.0396063327789307 }, { "id": "identity_245149", "image": "A-OKVQA/images/174896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the items on the table and categorize them into food, drink, and utensils. Then, determine which category has the most items and how many items are in that category" }, { "from": "gpt", "value": "The answer is Drink and utensils, 5 items each" } ], "clip_score": 21.96875, "loss": 3.1842732429504395 }, { "id": "identity_154091", "image": "ScienceQA/images/11541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the body of water located to the north of the highlighted country?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 24.125, "loss": 2.6391427516937256 }, { "id": "identity_30045", "image": "TabMWP/images/6125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The teachers at a middle school counted how many computers they had in their classrooms. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.96875, "loss": 2.69775652885437 }, { "id": "identity_120131", "image": "VQA-AS/images/9817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is shown in the image?" }, { "from": "gpt", "value": "The answer is Deciduous tree" } ], "clip_score": 24.0625, "loss": 2.8856959342956543 }, { "id": "identity_78671", "image": "TQA/images/question_images/human_system_circulatory_3647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the blood vessel that carries oxygenated blood from the lungs to the left side of the heart?" }, { "from": "gpt", "value": "The answer is left pulmonary veins" } ], "clip_score": 30.96875, "loss": 2.0619921684265137 }, { "id": "identity_8905", "image": "IconQA/images/104464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many squares are there?\nChoices:\n(A) 4\n(B) 2\n(C) 3\n(D) 1\n(E) 5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.828125, "loss": 3.5383694171905518 }, { "id": "identity_59564", "image": "FigureQA/images/29307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which section is larger, Orchid or Firebrick?" }, { "from": "gpt", "value": "The answer is Orchid" } ], "clip_score": 28.109375, "loss": 2.215747833251953 }, { "id": "identity_319750", "image": "UniGeo/images/1423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that the radius of circle O is 5, AB is the diameter of circle O, and D is a point on the extension of AB, with DC being the tangent to circle O and ∠CAB being 30°, what is the length of BD?\n\nChoices:\n(A) 10\n(B) 5√{3}\n(C) 5\n(D) \\frac{5}{2}√{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.28125, "loss": 2.7224090099334717 }, { "id": "identity_235851", "image": "MapQA/images/map_12098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the second highest value in the USA, and what is that value?" }, { "from": "gpt", "value": "The answer is Illinois, 52401-63005" } ], "clip_score": 32.375, "loss": 2.949357032775879 }, { "id": "identity_183689", "image": "TextVQA/images/36d8f0024be1a1a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label on the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is BIBLIOGRAPHIES" } ], "clip_score": 22.78125, "loss": 2.565595865249634 }, { "id": "identity_217011", "image": "PMC-VQA/images/PMC6819370_F2_57889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any lymphadenopathy be identified in the mediastinum?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.1875, "loss": 2.7435436248779297 }, { "id": "identity_267790", "image": "PlotQA/images/107024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars, starting from the left, is labeled as the 5th group?" }, { "from": "gpt", "value": "The answer is 2009" } ], "clip_score": 19.234375, "loss": 2.110811471939087 }, { "id": "identity_188243", "image": "PMC-VQA/images/PMC9058312_tca14396-fig-0001_269664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the opacity observed in the right upper lobe of the lung?" }, { "from": "gpt", "value": "The answer is Increased opacity in the right upper lobe" } ], "clip_score": 30.546875, "loss": 2.10332989692688 }, { "id": "identity_246328", "image": "AI2D/images/986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which figure represents the filamentous green algae that is known for its spiral chloroplasts and is located in the second row, first column?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.375, "loss": 2.5530757904052734 }, { "id": "identity_59758", "image": "DocVQA/images/gzmd0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the authorization request?" }, { "from": "gpt", "value": "The answer is Nasal Airway Inadequacy and Olfactory Performance" } ], "clip_score": 21.625, "loss": 3.7441928386688232 }, { "id": "identity_18032", "image": "UniGeo/images/995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,DC是弦,若∠COB=68°,则∠BDC的度数等于()\nChoices:\n(A) 30°\n(B) 32°\n(C) 34°\n(D) 45°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.546875, "loss": 2.5255656242370605 }, { "id": "identity_188074", "image": "VQA2.0/images/29018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cargo is the train carrying?" }, { "from": "gpt", "value": "The answer is Animals and bones" } ], "clip_score": 27.15625, "loss": 3.305281639099121 }, { "id": "identity_214062", "image": "VQA2.0/images/470762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is drawn on the side of the fire hydrant?" }, { "from": "gpt", "value": "The answer is A circle with a dot in the center" } ], "clip_score": 34.78125, "loss": 2.526010513305664 }, { "id": "identity_113477", "image": "PMC-VQA/images/PMC8815176_Fig1_184359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is shown in this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 28.171875, "loss": 2.877039670944214 }, { "id": "identity_188732", "image": "A-OKVQA/images/178635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are observing the skateboarder?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.46875, "loss": 2.523775100708008 }, { "id": "identity_132438", "image": "A-OKVQA/images/488014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the cyclist wearing any protective headgear?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.703125, "loss": 3.1376872062683105 }, { "id": "identity_209130", "image": "PMC-VQA/images/PMC5906535_F6_303486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar length in the image?" }, { "from": "gpt", "value": "The answer is The length indicated by the scale bar" } ], "clip_score": 12.2421875, "loss": 3.6865670680999756 }, { "id": "identity_232323", "image": "VQA2.0/images/479729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bananas are connected to the laptop, and what is the purpose of the setup?" }, { "from": "gpt", "value": "The answer is Five bananas, used as piano keys" } ], "clip_score": 39.59375, "loss": 3.8266143798828125 }, { "id": "identity_24802", "image": "VizWiz/images/VizWiz_train_00008597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What flavor of Marie Callender's dinner is this?" }, { "from": "gpt", "value": "The answer is beef n vegetables" } ], "clip_score": 30.578125, "loss": 3.4596688747406006 }, { "id": "identity_163736", "image": "GeoQA+/images/4900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知AD为△ABC的角平分线,DE∥AB交AC于E,如果∠BAC=70°,那么∠DEC的度数是多少?" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 32.59375, "loss": 1.720129370689392 }, { "id": "identity_182580", "image": "PMC-VQA/images/PMC8305520_Fig2_17359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the lung fields?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.46875, "loss": 2.957923412322998 }, { "id": "identity_206689", "image": "PMC-VQA/images/PMC2995804_F2_80277.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abnormality indicated by the black arrow?" }, { "from": "gpt", "value": "The answer is Subcutaneous emphysema" } ], "clip_score": 24.90625, "loss": 2.8384711742401123 }, { "id": "identity_267484", "image": "ScienceQA/images/3301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this basketball jersey?\nChoices:\n(A) polyester\n(B) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.375, "loss": 4.63440465927124 }, { "id": "identity_295819", "image": "PlotQA/images/134633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the graph, what is the total amount of aid provided by the EU?" }, { "from": "gpt", "value": "The answer is 300250000" } ], "clip_score": 27.828125, "loss": 2.182391881942749 }, { "id": "identity_297447", "image": "TabMWP/images/35720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on Allie's concern about exceeding her storage limit, she tracked the number of undeleted voicemail messages on her phone daily. What was the difference in the number of voicemail messages stored between Tuesday and Wednesday, as shown in the table?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 43.4375, "loss": 2.8316431045532227 }, { "id": "identity_188447", "image": "A-OKVQA/images/541596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any tools or equipment on the ground near the bus?" }, { "from": "gpt", "value": "The answer is Yes, there are tools and equipment" } ], "clip_score": 22.78125, "loss": 2.3861169815063477 }, { "id": "identity_181152", "image": "TabMWP/images/22121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total travel time from the shopping district to the science museum if one takes the first bus from each location?" }, { "from": "gpt", "value": "The answer is 2 hours" } ], "clip_score": 32.5, "loss": 1.941941499710083 }, { "id": "identity_103712", "image": "VQA-RAD/images/synpic16221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this patient have an enlarged cardiac silhouette?" }, { "from": "gpt", "value": "The answer is No enlarged cardiac silhouette" } ], "clip_score": 26.09375, "loss": 2.2010438442230225 }, { "id": "identity_265279", "image": "FigureQA/images/61606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Firebrick and Medium Orchid intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.03125, "loss": 3.9016952514648438 }, { "id": "identity_56495", "image": "MapQA/images/map_10228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the Midwest?" }, { "from": "gpt", "value": "The answer is 29.4%-54.8%" } ], "clip_score": 24.28125, "loss": 2.506150960922241 }, { "id": "identity_167728", "image": "IconQA/images/56201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more yellow objects or red objects?" }, { "from": "gpt", "value": "The answer is More yellow objects" } ], "clip_score": 27.1875, "loss": 2.578281879425049 }, { "id": "identity_3361", "image": "A-OKVQA/images/394334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where can you buy these donuts?\nChoices:\n(A) japan\n(B) south korea\n(C) china\n(D) singapore" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.8125, "loss": 3.6735999584198 }, { "id": "identity_232907", "image": "VQA2.0/images/556406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are there in the image, and how many trees can be seen in the background?" }, { "from": "gpt", "value": "The answer is 8 zebras and 9 trees" } ], "clip_score": 32.34375, "loss": 1.8125865459442139 }, { "id": "identity_242640", "image": "UniGeo/images/2969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,有一块含有30°角的直角三角形板的两个顶点放在直尺的对边上.如果∠2=44°,那么∠1和∠3的度数之和是多少?" }, { "from": "gpt", "value": "The answer is 106°" } ], "clip_score": 25.90625, "loss": 2.1014668941497803 }, { "id": "identity_7349", "image": "DVQA/images/bar_train_00187246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the green color represent?" }, { "from": "gpt", "value": "The answer is gas" } ], "clip_score": 23.6875, "loss": 3.8984501361846924 }, { "id": "identity_45160", "image": "DVQA/images/bar_train_00191385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the highest accuracy on the algorithm tire?" }, { "from": "gpt", "value": "The answer is crowd" } ], "clip_score": 31.15625, "loss": 3.1677756309509277 }, { "id": "identity_335012", "image": "TabMWP/images/16714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at this schedule. Which event finishes at 2.50 P.M.?\nChoices:\n(A) shark lecture\n(B) octopus lecture\n(C) turtle lecture\n(D) seahorse lecture" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.625, "loss": 3.9528567790985107 }, { "id": "identity_258514", "image": "PMC-VQA/images/PMC6273024_molecules-21-01080-f004_405728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical features visible in the imaged body part, and how do they help in identifying the modality used?" }, { "from": "gpt", "value": "The answer is Cerebral vasculature, angiogram or similar vascular imaging technique" } ], "clip_score": 32.34375, "loss": 2.833901882171631 }, { "id": "identity_43364", "image": "DVQA/images/bar_train_00119159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'wait' store have any item that outsold the same item in the 'youth' store?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.5, "loss": 3.009479522705078 }, { "id": "identity_326893", "image": "PMC-VQA/images/PMC6858268_FIG2_69330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the cause of the arterial extravasation seen in this image?\nChoices:\n(A) thrombus formation\n(B) atherosclerotic plaque\n(C) hemorrhage\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.4375, "loss": 4.079556941986084 }, { "id": "identity_104449", "image": "PMC-VQA/images/PMC3078894_F6_92691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more red objects or green objects in the image?" }, { "from": "gpt", "value": "The answer is More red objects" } ], "clip_score": 18.03125, "loss": 2.809418201446533 }, { "id": "identity_34569", "image": "VQA2.0/images/77195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 18.28125, "loss": 2.1613399982452393 }, { "id": "identity_144590", "image": "MapQA/images/map_5298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does New York have a higher crowd percentage than Texas?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.90625, "loss": 2.8130791187286377 }, { "id": "identity_174710", "image": "TabMWP/images/38189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you have $1.00, how many green colored pencils can you buy?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 28.59375, "loss": 2.2813363075256348 }, { "id": "identity_31785", "image": "DocVQA/images/hzhd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date mentioned in document?" }, { "from": "gpt", "value": "The answer is November 14, 1975" } ], "clip_score": 24.15625, "loss": 1.5830838680267334 }, { "id": "identity_336211", "image": "PMC-VQA/images/PMC4477478_Fig1_399126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the issue depicted in the image?\nChoices:\n(A) Thickened appendix wall\n(B) Dilated distal vessels arising from superior mesenteric artery\n(C) Thickened recto-sigmoid wall with abnormal vessels arising from inferior mesenteric artery\n(D) Normal colon wall with normal vessels." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.6875, "loss": 4.323462009429932 }, { "id": "identity_155522", "image": "ScienceQA/images/13775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located to the southeast of the highlighted state?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 33.25, "loss": 3.0068702697753906 }, { "id": "identity_108277", "image": "Super-CLEVR/images/superCLEVR_new_001032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that has the same shape as the cyan object?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.5625, "loss": 3.8545355796813965 }, { "id": "identity_3077", "image": "ScienceQA/images/13025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Ayana's test show?\nChoices:\n(A) if adding the blade guards made the drone fly poorly\n(B) how much the drone weighed with the blade guards\n(C) if the blade guards would break in a crash" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.984375, "loss": 3.5820236206054688 }, { "id": "identity_193138", "image": "VQA2.0/images/214461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees can be seen in the background of the image?" }, { "from": "gpt", "value": "The answer is Pine trees" } ], "clip_score": 12.25, "loss": 2.2709155082702637 }, { "id": "identity_22940", "image": "TabMWP/images/10780.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Julie looked over her calendar to see how many times she had soccer practice each month. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 34.0, "loss": 1.9610679149627686 }, { "id": "identity_262301", "image": "PMC-VQA/images/PMC5524557_F3_194869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In relation to the statement, on which side of the abdomen is the right side located?\nChoices:\n(A) Left\n(B) Right\n(C) Upper\n(D) Lower" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.9375, "loss": 4.9047465324401855 }, { "id": "identity_28551", "image": "UniGeo/images/1184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,已知∠AOB=120°,则∠ACB等于()\nChoices:\n(A) 45°\n(B) 30°\n(C) 60°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.34375, "loss": 2.714181661605835 }, { "id": "identity_10240", "image": "UniGeo/images/2971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 一副三角板如图摆放,且∠1的度数比∠2度数大54度,则∠1的度数是()\nChoices:\n(A) 72\n(B) 70\n(C) 54\n(D) 18" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.90625, "loss": 3.0829455852508545 }, { "id": "identity_177497", "image": "IconQA/images/18149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this 3D shape?" }, { "from": "gpt", "value": "The answer is Cone" } ], "clip_score": 32.6875, "loss": 3.039989471435547 }, { "id": "identity_116507", "image": "VQA-AS/images/5907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the scooter the person is riding?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 31.5625, "loss": 2.780057430267334 }, { "id": "identity_24222", "image": "A-OKVQA/images/364691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of event is the skateboarder taking place in?\nChoices:\n(A) deathmatch\n(B) slalom\n(C) best trick\n(D) lap race" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.984375, "loss": 4.275313854217529 }, { "id": "identity_33550", "image": "DocVQA/images/ztnk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of this page which is written in top ?" }, { "from": "gpt", "value": "The answer is -3-" } ], "clip_score": 16.65625, "loss": 2.93268084526062 }, { "id": "identity_117523", "image": "VizWiz/images/VizWiz_train_00016351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the flowers in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.984375, "loss": 3.39855694770813 }, { "id": "identity_320712", "image": "A-OKVQA/images/262394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this type of vehicle known as?\nChoices:\n(A) minibike\n(B) dirt bike\n(C) scooter\n(D) cruiser" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.90625, "loss": 3.9365382194519043 }, { "id": "identity_148818", "image": "MapQA/images/map_4433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Alaska included in the value range analysis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.9375, "loss": 3.1044278144836426 }, { "id": "identity_212565", "image": "PMC-VQA/images/PMC4962499_Fig1_56203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is shown in the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Doppler ultrasound" } ], "clip_score": 30.171875, "loss": 2.2273993492126465 }, { "id": "identity_137447", "image": "DocVQA/images/zjlk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the publication associated with the editor Mr. Ed Kummer?" }, { "from": "gpt", "value": "The answer is The Herald" } ], "clip_score": 22.671875, "loss": 3.422440767288208 }, { "id": "identity_78921", "image": "GeoQA+/images/5275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the regular octagon ABCDEFGH shown in the image, what is the measure of angle BAG?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 35.21875, "loss": 1.685788631439209 }, { "id": "identity_274395", "image": "A-OKVQA/images/439299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What new addition has been made to this surfboard?\nChoices:\n(A) wheels\n(B) seat\n(C) umbrella\n(D) pole" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.96875, "loss": 4.195435047149658 }, { "id": "identity_247220", "image": "VQA2.0/images/324246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the elements visible in the image, describe the likely location and purpose of the structure in the foreground" }, { "from": "gpt", "value": "The answer is Beach parking management" } ], "clip_score": 32.625, "loss": 4.700104713439941 }, { "id": "identity_288910", "image": "TextVQA/images/a9ec28c332244746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this an individual who is a child?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 11.0859375, "loss": 3.2742042541503906 }, { "id": "identity_183071", "image": "VQA-AS/images/3861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the plant pot on the table?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.875, "loss": 3.3020758628845215 }, { "id": "identity_278717", "image": "DVQA/images/bar_train_00127280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of sales between the most sold item and the least sold item?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.421875, "loss": 2.8218801021575928 }, { "id": "identity_330108", "image": "IconQA/images/34556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of this is orange?\nChoices:\n(A) 1/5\n(B) 1/2\n(C) 1/3\n(D) 1/4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.71875, "loss": 3.5337531566619873 }, { "id": "identity_70380", "image": "TQA/images/question_images/cycle_carbon_5009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the arrows pointing from the sun to the trees and from the trees to the clouds?" }, { "from": "gpt", "value": "The answer is Photosynthesis and Plant respiration" } ], "clip_score": 30.921875, "loss": 2.2117271423339844 }, { "id": "identity_204593", "image": "TextVQA/images/290bf790e5420c37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the blog post featured in the image?" }, { "from": "gpt", "value": "The answer is Education or curriculum" } ], "clip_score": 27.390625, "loss": 4.068864822387695 }, { "id": "identity_290630", "image": "Super-CLEVR/images/superCLEVR_new_000341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red metal objects are located to the right of the tiny brown object?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 19.453125, "loss": 2.8811540603637695 }, { "id": "identity_189976", "image": "PMC-VQA/images/PMC9091813_F4_276614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image in color or black and white?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 19.890625, "loss": 2.164447546005249 }, { "id": "identity_337375", "image": "PMC-VQA/images/PMC5156805_fig2_105366.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following chambers is separated by the membrane in this condition?\nChoices:\n(A) Right atrium and right ventricle \n(B) Right atrium and left atrium \n(C) Left atrium and left ventricle \n(D) Left ventricle and right ventricle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.25, "loss": 3.934208631515503 }, { "id": "identity_114365", "image": "A-OKVQA/images/413874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the player in the foreground wearing?" }, { "from": "gpt", "value": "The answer is Tennis shoes" } ], "clip_score": 25.953125, "loss": 2.2573189735412598 }, { "id": "identity_245592", "image": "VQA2.0/images/74090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field, including the batter, catcher, and any other players in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.125, "loss": 2.3354806900024414 }, { "id": "identity_244895", "image": "DVQA/images/bar_train_00069135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithm device for all the datasets?" }, { "from": "gpt", "value": "The answer is 5.4" } ], "clip_score": 33.40625, "loss": 2.313861608505249 }, { "id": "identity_237933", "image": "AI2D/images/1949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animals eat both the Banana Plant and the Palm Tree?" }, { "from": "gpt", "value": "The answer is Howler Monkey" } ], "clip_score": 28.703125, "loss": 2.6452581882476807 }, { "id": "identity_143424", "image": "DocVQA/images/sxhk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the person who took the trip?" }, { "from": "gpt", "value": "The answer is 603 W. Michigan Ave, Urbana, Illinois" } ], "clip_score": 19.640625, "loss": 1.5797324180603027 }, { "id": "identity_70930", "image": "TQA/images/question_images/human_system_respiratory_3595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part is responsible for preventing food from entering the airway?" }, { "from": "gpt", "value": "The answer is epiglottis" } ], "clip_score": 25.0, "loss": 1.910271167755127 }, { "id": "identity_20592", "image": "DVQA/images/bar_train_00136571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the least number of people summed across all the categories?" }, { "from": "gpt", "value": "The answer is ten" } ], "clip_score": 35.28125, "loss": 3.149505138397217 }, { "id": "identity_275392", "image": "DocVQA/images/pfdd0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of income from sources other than receipts up to September 30th?" }, { "from": "gpt", "value": "The answer is 2,585.81" } ], "clip_score": 24.328125, "loss": 2.193481683731079 }, { "id": "identity_56151", "image": "FigureQA/images/26096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Orchid in the pie chart?" }, { "from": "gpt", "value": "The answer is 25%" } ], "clip_score": 37.71875, "loss": 1.493432641029358 }, { "id": "identity_183621", "image": "TextVQA/images/0193991401f83e64.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car closest to the bottom of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.578125, "loss": 3.4254958629608154 }, { "id": "identity_321613", "image": "A-OKVQA/images/538230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of vehicles are being raced here?\nChoices:\n(A) skate boards\n(B) tricycles\n(C) tractors\n(D) dirt bikes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.171875, "loss": 3.7429358959198 }, { "id": "identity_40518", "image": "DVQA/images/bar_train_00104194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the topmost group of bars?" }, { "from": "gpt", "value": "The answer is hole" } ], "clip_score": 20.5625, "loss": 3.695080518722534 }, { "id": "identity_269632", "image": "TextVQA/images/bee12950e4213f2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what time was this document printed?" }, { "from": "gpt", "value": "The answer is january 1984" } ], "clip_score": 19.125, "loss": 1.9745450019836426 }, { "id": "identity_62471", "image": "DVQA/images/bar_train_00043443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of people who preferred 'sheet' objects across all categories?" }, { "from": "gpt", "value": "The answer is 1.8" } ], "clip_score": 30.875, "loss": 1.924050211906433 }, { "id": "identity_284612", "image": "DocVQA/images/qhfk0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What topics does Nancy Zucker-Brod address for the Associated Press?" }, { "from": "gpt", "value": "The answer is tobacco issues" } ], "clip_score": 21.765625, "loss": 3.467405319213867 }, { "id": "identity_20487", "image": "VQA2.0/images/164842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are skating?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.796875, "loss": 2.4048261642456055 }, { "id": "identity_169572", "image": "TabMWP/images/9510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the ceramic vase and the crystal vase?" }, { "from": "gpt", "value": "The answer is $27" } ], "clip_score": 32.40625, "loss": 2.8481719493865967 }, { "id": "identity_217584", "image": "PMC-VQA/images/PMC7251083_Fig4_178724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of tissue damage or pathology in the image?" }, { "from": "gpt", "value": "The answer is No visible signs of tissue damage or pathology are present" } ], "clip_score": 29.90625, "loss": 2.4188895225524902 }, { "id": "identity_90097", "image": "Geometry3K/images/181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the diagonals QT and RS intersect at point Z, what is the length of segment RZ if QR = 22, XY = 15, and TS = 8?" }, { "from": "gpt", "value": "The answer is 7.5" } ], "clip_score": 33.34375, "loss": 1.7436259984970093 }, { "id": "identity_326961", "image": "A-OKVQA/images/419610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is painted on the metal frame of the bicycle parked in front of the store?\nChoices:\n(A) black\n(B) red\n(C) pink\n(D) green" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.734375, "loss": 3.9008865356445312 }, { "id": "identity_157047", "image": "ScienceQA/images/11315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean lies directly east of the highlighted country?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 31.171875, "loss": 2.549736738204956 }, { "id": "identity_128605", "image": "PMC-VQA/images/PMC3703402_fig2_216198.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the imaging technique used to obtain this picture?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.640625, "loss": 2.4006974697113037 }, { "id": "identity_231745", "image": "VizWiz/images/VizWiz_train_00016072.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chairs are visible in the image, and what kind of soda is on the table?" }, { "from": "gpt", "value": "The answer is Two chairs, Sprite" } ], "clip_score": 27.65625, "loss": 3.1623926162719727 }, { "id": "identity_95636", "image": "IconQA/images/102115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of points on the stars?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 26.421875, "loss": 1.7709574699401855 }, { "id": "identity_28463", "image": "GeoQA+/images/2040.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C、D都在⊙O上,若∠ABD=63°、∠DCO=24°,则∠BDC的度数是()\nChoices:\n(A) 15°\n(B) 24°\n(C) 39°\n(D) 63°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.8125, "loss": 2.4570915699005127 }, { "id": "identity_20933", "image": "A-OKVQA/images/185302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is available to get from the boats to the ground level?\nChoices:\n(A) rope\n(B) stairs\n(C) ladder\n(D) elevator" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.9375, "loss": 4.168972969055176 }, { "id": "identity_304507", "image": "DVQA/images/bar_train_00174539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this contain stacked bars?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.171875, "loss": 4.129334926605225 }, { "id": "identity_266294", "image": "PMC-VQA/images/PMC7848076_F6_356972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of echocardiography was utilized? \nOptions:\n(A) Transesophageal echocardiography\n(B) M-mode echocardiography\n(C) Doppler echocardiography\n(D) 3D echocardiography" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.53125, "loss": 4.7804460525512695 }, { "id": "identity_118551", "image": "VQA-AS/images/614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the dog jumping in the air?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 29.9375, "loss": 2.4759185314178467 }, { "id": "identity_143438", "image": "ChartQA/images/two_col_101216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in turnover from 2014 to 2015?" }, { "from": "gpt", "value": "The answer is 28.91%" } ], "clip_score": 24.0625, "loss": 1.959143042564392 }, { "id": "identity_29087", "image": "FigureQA/images/30354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Gold the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.5373029708862305 }, { "id": "identity_293102", "image": "Super-CLEVR/images/superCLEVR_new_001495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the big motorbike, which shares the same color as the school bus, made of?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 28.5625, "loss": 3.7397961616516113 }, { "id": "identity_73303", "image": "ScienceQA/images/5715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the island to the west of the highlighted country?" }, { "from": "gpt", "value": "The answer is Cuba" } ], "clip_score": 33.21875, "loss": 3.5267748832702637 }, { "id": "identity_105274", "image": "A-OKVQA/images/234929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the table on which the plane is placed?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 12.453125, "loss": 2.565446138381958 }, { "id": "identity_160371", "image": "Geometry3K/images/1219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side SV in the parallelogram STVW?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 37.78125, "loss": 2.5796525478363037 }, { "id": "identity_54748", "image": "DocVQA/images/sqdl0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the MDD number given in the sales research proposal?" }, { "from": "gpt", "value": "The answer is 86-51102" } ], "clip_score": 22.890625, "loss": 2.521188497543335 }, { "id": "identity_156374", "image": "TQA/images/question_images/tides_2608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two types of tides shown in the image?" }, { "from": "gpt", "value": "The answer is High Tide and Low Tide" } ], "clip_score": 33.875, "loss": 1.790452241897583 }, { "id": "identity_198870", "image": "VQA-AS/images/13161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the flying disc on the ground?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.390625, "loss": 3.6718621253967285 }, { "id": "identity_89724", "image": "UniGeo/images/2134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AB in the trapezoid ABCD?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.34375, "loss": 2.6169545650482178 }, { "id": "identity_1291", "image": "TextVQA/images/83c61f30e2d969b3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: can you park for 4 hours?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.40625, "loss": 3.165037155151367 }, { "id": "identity_205556", "image": "A-OKVQA/images/484273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cooking utensil is the person on the left holding?" }, { "from": "gpt", "value": "The answer is A spatula" } ], "clip_score": 21.421875, "loss": 2.957413673400879 }, { "id": "identity_157913", "image": "TQA/images/question_images/parts_fish_2909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the fin labeled as \"caudal fin\"?" }, { "from": "gpt", "value": "The answer is It is used for propulsion" } ], "clip_score": 25.9375, "loss": 2.90102481842041 }, { "id": "identity_51017", "image": "DVQA/images/bar_train_00056941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item \"maid\" across all stores?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 27.265625, "loss": 2.520660877227783 }, { "id": "identity_334165", "image": "ScienceQA/images/1547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.8125, "loss": 3.5573718547821045 }, { "id": "identity_314023", "image": "AI2D/images/201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify a predator from this diagram.\nChoices:\n(A) Forest floor\n(B) Animals\n(C) Soil\n(D) Trees" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.25, "loss": 3.9433834552764893 }, { "id": "identity_13626", "image": "ScienceQA/images/18647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the event chain, which house does the first pig live in at the end?\nChoices:\n(A) the straw house\n(B) the brick house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.203125, "loss": 3.0905354022979736 }, { "id": "identity_304929", "image": "GeoQA+/images/2272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C、D在⊙O上,且在AB异侧,连接OC、CD、DA.若∠BOC=130°,则∠D的大小是多少()\nChoices:\n(A) 15°\n(B) 25°\n(C) 35°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.3125, "loss": 2.4939639568328857 }, { "id": "identity_72019", "image": "ScienceQA/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the largest inclusion within the rock?" }, { "from": "gpt", "value": "The answer is 2 cm" } ], "clip_score": 28.96875, "loss": 2.3151650428771973 }, { "id": "identity_310497", "image": "FigureQA/images/76405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.3515625, "loss": 4.043065547943115 }, { "id": "identity_297352", "image": "ScienceQA/images/18156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following choices correctly identifies the direction of thermal energy transfer during this time: from each vial to the surroundings or from the surroundings to each vial?\nChoices:\n(A) each vial . . . the surroundings\n(B) the surroundings . . . each vial" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.15625, "loss": 3.987180709838867 }, { "id": "identity_244512", "image": "TextVQA/images/01d095f50659d413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific tasks is the person performing on the computer screens, and what software might they be using based on the visible interface elements?" }, { "from": "gpt", "value": "The answer is The person is performing video editing tasks using editing software" } ], "clip_score": 23.84375, "loss": 3.494450807571411 }, { "id": "identity_157345", "image": "ScienceQA/images/11278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of poles are facing each other in the bottom pair of magnets?" }, { "from": "gpt", "value": "The answer is North and North poles" } ], "clip_score": 20.09375, "loss": 2.8879220485687256 }, { "id": "identity_235617", "image": "Super-CLEVR/images/superCLEVR_new_000169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of big metal objects to small things?" }, { "from": "gpt", "value": "The answer is 5:4" } ], "clip_score": 15.8203125, "loss": 1.9672932624816895 }, { "id": "identity_65179", "image": "ScienceQA/images/12385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region is directly south of the highlighted area?" }, { "from": "gpt", "value": "The answer is The region directly below the highlighted area" } ], "clip_score": 26.546875, "loss": 3.2924129962921143 }, { "id": "identity_326952", "image": "IconQA/images/71566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Adjust the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.59375, "loss": 2.4814727306365967 }, { "id": "identity_246581", "image": "DocVQA/images/rxvh0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total budget recommended for the next five years if the center is incrementally funded at the same rate as FY 1976-1977?" }, { "from": "gpt", "value": "The answer is $2,382,340" } ], "clip_score": 17.828125, "loss": 1.950765609741211 }, { "id": "identity_20298", "image": "TextVQA/images/003c0cac1ea19d3e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what tournament is being held here?" }, { "from": "gpt", "value": "The answer is poker" } ], "clip_score": 27.53125, "loss": 2.7532947063446045 }, { "id": "identity_302728", "image": "DocVQA/images/mhng0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this program?" }, { "from": "gpt", "value": "The answer is DORAL B2GF LIGHTER" } ], "clip_score": 21.984375, "loss": 4.031811714172363 }, { "id": "identity_199661", "image": "Super-CLEVR/images/superCLEVR_new_000382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bicycle located between the green bus and the purple motorbike?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 36.09375, "loss": 3.4862380027770996 }, { "id": "identity_293903", "image": "VQA2.0/images/538272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of individuals present in this location?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 8.265625, "loss": 2.0030367374420166 }, { "id": "identity_275804", "image": "DVQA/images/bar_train_00108699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what margin is the most accurate algorithm superior to the least accurate algorithm in terms of accuracy?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 36.09375, "loss": 2.8753623962402344 }, { "id": "identity_310952", "image": "TQA/images/question_images/cycle_rock_6740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What step follows transportation?\nChoices:\n(A) solidification\n(B) lithification\n(C) weathering\n(D) deposition" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.453125, "loss": 4.140475273132324 }, { "id": "identity_75316", "image": "ScienceQA/images/9142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Rodrigo and Kathleen were to share their items equally, how many items would each person get?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.0, "loss": 3.0404961109161377 }, { "id": "identity_170000", "image": "TabMWP/images/26769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of paintings per exhibit?" }, { "from": "gpt", "value": "The answer is 67 and 83" } ], "clip_score": 34.28125, "loss": 2.8774592876434326 }, { "id": "identity_184954", "image": "VQA-AS/images/15475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many building blocks are scattered on the floor?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 25.171875, "loss": 2.3546359539031982 }, { "id": "identity_72841", "image": "TQA/images/question_images/human_system_digestive_3672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the digestive system is primarily responsible for water absorption?" }, { "from": "gpt", "value": "The answer is Large Intestine" } ], "clip_score": 30.90625, "loss": 1.3871448040008545 }, { "id": "identity_96239", "image": "TabMWP/images/22781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the longest wait time for a train at Happy Cow Farm?" }, { "from": "gpt", "value": "The answer is 3 hours and 30 minutes" } ], "clip_score": 31.375, "loss": 1.7239584922790527 }, { "id": "identity_94623", "image": "IconQA/images/16238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of angles present in all the shapes combined?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 25.65625, "loss": 2.4917027950286865 }, { "id": "identity_89926", "image": "Geometry3K/images/1957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale factor from the smaller triangle to the larger triangle?" }, { "from": "gpt", "value": "The answer is 4/3" } ], "clip_score": 32.625, "loss": 2.08994197845459 }, { "id": "identity_239533", "image": "ChartQA/images/14873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage does the sum of No confidence and DK fall short of Confidence?" }, { "from": "gpt", "value": "The answer is 54%" } ], "clip_score": 38.8125, "loss": 2.201354503631592 }, { "id": "identity_253751", "image": "PlotQA/images/147112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year is the difference between the sum of the percentage of male workers in agriculture and industry and the percentage of male workers in services the greatest, and what is that difference?" }, { "from": "gpt", "value": "The answer is 1997, -30%" } ], "clip_score": 32.0, "loss": 2.606652021408081 }, { "id": "identity_124078", "image": "VQA2.0/images/566583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many picture frames can be seen on the wall shelf?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.578125, "loss": 4.703537940979004 }, { "id": "identity_43075", "image": "ChartQA/images/multi_col_100105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which religion maintained the same percentage from 2013 to 2014?" }, { "from": "gpt", "value": "The answer is Catholic, Mormon, Jewish, Muslim, Other non-Christian religion, No response given" } ], "clip_score": 25.921875, "loss": 2.532639503479004 }, { "id": "identity_304718", "image": "UniGeo/images/1020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,CD是⊙O的弦,∠ABD=62°,则∠DCB的度数是多少?\n\nChoices:\n(A) 28°\n(B) 30°\n(C) 59°\n(D) 62°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.75, "loss": 2.491459608078003 }, { "id": "identity_262251", "image": "VQA-AS/images/11466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the boy's feet touching the floor?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.859375, "loss": 3.6394248008728027 }, { "id": "identity_284743", "image": "A-OKVQA/images/522461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity are the individuals sitting in the snow engaged in?\nChoices:\n(A) waiting\n(B) sledding\n(C) gambling\n(D) skiing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.09375, "loss": 4.200182914733887 }, { "id": "identity_304904", "image": "PMC-VQA/images/PMC7510301_Fig3_259713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the blue area signify in the image?\nChoices:\n(A) GTV-T\n(B) GTV-N\n(C) PGTV\n(D) PTV" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.15625, "loss": 4.112210273742676 }, { "id": "identity_130815", "image": "TextVQA/images/b9bedb287ce4bc89.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color theme of the magazine cover?" }, { "from": "gpt", "value": "The answer is Pink and white" } ], "clip_score": 26.578125, "loss": 3.006258726119995 }, { "id": "identity_142818", "image": "DocVQA/images/rxfl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which city was the hearing held?" }, { "from": "gpt", "value": "The answer is Washington, D.C" } ], "clip_score": 23.984375, "loss": 2.2185685634613037 }, { "id": "identity_264880", "image": "CLEVR-Math/images/CLEVR_train_063113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After removing all gray balls and red cubes, how many balls remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.65625, "loss": 2.1526193618774414 }, { "id": "identity_113089", "image": "VQA2.0/images/140690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the tennis court?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.859375, "loss": 2.6471245288848877 }, { "id": "identity_64454", "image": "ScienceQA/images/385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the peninsula located on the eastern side of the highlighted country?" }, { "from": "gpt", "value": "The answer is Yucatan Peninsula" } ], "clip_score": 25.296875, "loss": 1.8248100280761719 }, { "id": "identity_138167", "image": "DocVQA/images/frvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are listed under VNA Staff Development?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.875, "loss": 2.5614216327667236 }, { "id": "identity_143373", "image": "ChartQA/images/two_col_4558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest FIFA World Ranking position achieved by the Curaçao national football team within the given time frame?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 24.484375, "loss": 2.2524685859680176 }, { "id": "identity_148233", "image": "MapQA/images/map_5388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the New England states?" }, { "from": "gpt", "value": "The answer is 60-80" } ], "clip_score": 28.609375, "loss": 1.9480232000350952 }, { "id": "identity_124297", "image": "A-OKVQA/images/524255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many performers are on the horse?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.78125, "loss": 2.171238660812378 }, { "id": "identity_83262", "image": "Geometry3K/images/151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side AB in triangle ABC?" }, { "from": "gpt", "value": "The answer is AB = 182 inches" } ], "clip_score": 36.3125, "loss": 3.5480539798736572 }, { "id": "identity_684", "image": "AI2D/images/2382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What organism is finally produced in the diagram?\nChoices:\n(A) caterpiller\n(B) egg\n(C) larvae\n(D) butterfly" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.953125, "loss": 3.774419069290161 }, { "id": "identity_84117", "image": "GeoQA+/images/1827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that OE is perpendicular to AB and ∠DOB is 60°, what is the measure of ∠AOC?" }, { "from": "gpt", "value": "The answer is 150°" } ], "clip_score": 33.125, "loss": 1.6479064226150513 }, { "id": "identity_89973", "image": "GEOS/images/035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the side length of square JETS is doubled, what will be the new area of quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 36.28125, "loss": 2.7807159423828125 }, { "id": "identity_50650", "image": "DVQA/images/bar_train_00024710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are liked by exactly 5 people?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 27.09375, "loss": 2.2276437282562256 }, { "id": "identity_39378", "image": "FigureQA/images/9808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many segments are there in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.09375, "loss": 2.6558141708374023 }, { "id": "identity_304547", "image": "DocVQA/images/mqxb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mentioned date?" }, { "from": "gpt", "value": "The answer is 3 April 1969" } ], "clip_score": 19.609375, "loss": 1.7571343183517456 }, { "id": "identity_26388", "image": "PMC-VQA/images/PMC6062049_pone.0201004.g003_346459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which body part was imaged in the patient? \nChoices:\n(A) Brain \n(B) Thighs \n(C) Arm \n(D) Chest " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.390625, "loss": 4.2556962966918945 }, { "id": "identity_388", "image": "VizWiz/images/VizWiz_train_00006829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is that?" }, { "from": "gpt", "value": "The answer is mouse" } ], "clip_score": 21.90625, "loss": 3.0609898567199707 }, { "id": "identity_23821", "image": "VizWiz/images/VizWiz_train_00012329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what this is?" }, { "from": "gpt", "value": "The answer is ibuprofen 200mg liquid softgels" } ], "clip_score": 35.125, "loss": 1.301887035369873 }, { "id": "identity_335709", "image": "DocVQA/images/txcx0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of birth given this document?" }, { "from": "gpt", "value": "The answer is August 6, 1942" } ], "clip_score": 22.59375, "loss": 2.123037815093994 }, { "id": "identity_10533", "image": "PlotQA/images/107027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all years, what is the minimum number of technicians ?" }, { "from": "gpt", "value": "The answer is 45.33696" } ], "clip_score": 16.75, "loss": 3.07418155670166 }, { "id": "identity_316942", "image": "IconQA/images/24933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Adjust the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.6875, "loss": 2.660466194152832 }, { "id": "identity_20492", "image": "Super-CLEVR/images/superCLEVR_new_000693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a object that is in front of the tiny metallic minivan and on the right side of the brown motorbike; what material is it made of?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 28.859375, "loss": 3.0121586322784424 }, { "id": "identity_58847", "image": "FigureQA/images/36520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the color Crimson?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 20.890625, "loss": 2.6060988903045654 }, { "id": "identity_218374", "image": "A-OKVQA/images/390213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the tennis player's outfit?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 32.625, "loss": 3.672816753387451 }, { "id": "identity_63844", "image": "ScienceQA/images/19733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in this food web is a primary consumer?" }, { "from": "gpt", "value": "The answer is Pine vole and Swallowtail caterpillar" } ], "clip_score": 25.265625, "loss": 1.9318232536315918 }, { "id": "identity_298979", "image": "TextVQA/images/c7761fb8ebae3837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the book?" }, { "from": "gpt", "value": "The answer is the street-smart naturalist" } ], "clip_score": 36.875, "loss": 2.228255033493042 }, { "id": "identity_83868", "image": "Geometry3K/images/212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of all angles in the given diagram?" }, { "from": "gpt", "value": "The answer is 360 degrees" } ], "clip_score": 32.1875, "loss": 1.7017492055892944 }, { "id": "identity_18010", "image": "A-OKVQA/images/20671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being given to the dog here?\nChoices:\n(A) water\n(B) melon\n(C) burgers\n(D) nothing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.1171875, "loss": 3.9468154907226562 }, { "id": "identity_118717", "image": "VQA-AS/images/19673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toy blocks are visible on the floor?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 20.234375, "loss": 2.419994354248047 }, { "id": "identity_46929", "image": "FigureQA/images/84210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the x-axis value for the Dark Magenta dot that is closest to the 100 mark on the y-axis?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.921875, "loss": 2.803187131881714 }, { "id": "identity_175243", "image": "IconQA/images/21836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the stars the same size?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.203125, "loss": 3.3660671710968018 }, { "id": "identity_171417", "image": "IconQA/images/33884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 2 more lightning bolts were added to the bottom ten frame, how many empty squares would remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.0, "loss": 2.323422908782959 }, { "id": "identity_329260", "image": "IconQA/images/94845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there an odd or even number of items here?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.90625, "loss": 4.072691440582275 }, { "id": "identity_2618", "image": "VQA-RAD/images/synpic48714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category is the hemmorhage?" }, { "from": "gpt", "value": "The answer is Subarachnoid" } ], "clip_score": 19.1875, "loss": 2.062755584716797 }, { "id": "identity_260147", "image": "IconQA/images/57368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the digits of the number shown?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.515625, "loss": 2.271338701248169 }, { "id": "identity_64252", "image": "TQA/images/question_images/parts_fish_2908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which fin is responsible for the balance and staying upright in the water?" }, { "from": "gpt", "value": "The answer is Dorsal fin" } ], "clip_score": 30.59375, "loss": 1.986189603805542 }, { "id": "identity_223743", "image": "VQA2.0/images/107584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does this scene depict?" }, { "from": "gpt", "value": "The answer is Early morning or late evening" } ], "clip_score": 12.4453125, "loss": 3.618272066116333 }, { "id": "identity_233677", "image": "DocVQA/images/sngf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of The American Public Health Association, Inc., including the city and state?" }, { "from": "gpt", "value": "The answer is 1790 Broadway, New York, N.Y" } ], "clip_score": 24.125, "loss": 1.7272610664367676 }, { "id": "identity_299167", "image": "GeoQA+/images/4839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, if a 80° angle is removed from quadrilateral ABCD to form pentagon BCDEF, what is the sum of angles ∠1 and ∠2?\nChoices:\n(A) 240°\n(B) 260°\n(C) 280°\n(D) 300°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.96875, "loss": 2.478109359741211 }, { "id": "identity_71096", "image": "AI2D/images/1847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the secondary consumer in the food web that feeds on the caterpillar" }, { "from": "gpt", "value": "The answer is Blackbird" } ], "clip_score": 30.984375, "loss": 3.510265588760376 }, { "id": "identity_159590", "image": "GeoQA+/images/5259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with angle B equal to 60 degrees and angle C equal to 70 degrees, and circle O with diameter AB, find the measure of angle COD" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 33.6875, "loss": 1.9975632429122925 }, { "id": "identity_230222", "image": "GeoQA+/images/113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠ACB=90°,M、N分别是AB、AC的中点,延长BC至点D,使CD=\\frac{1}{3}×BD,连接DM、DN、MN、CM.若AB=6,求DM的值。" }, { "from": "gpt", "value": "The answer is \\( \\sqrt{17} \\)" } ], "clip_score": 30.28125, "loss": 3.0342633724212646 }, { "id": "identity_124886", "image": "VQA2.0/images/142749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding surfboards?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.28125, "loss": 2.2505390644073486 }, { "id": "identity_20791", "image": "PMC-VQA/images/PMC6201609_f0001_386086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the CT scan image?\nChoices:\n(A) Right cervical mass \n(B) Left cervical mass \n(C) Middle cervical mass \n(D) No mass" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.921875, "loss": 4.41060733795166 }, { "id": "identity_167560", "image": "IconQA/images/27939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the letter 'G' in grid units before the transformation?" }, { "from": "gpt", "value": "The answer is The perimeter of the letter 'G' is 20 grid units before the transformation" } ], "clip_score": 32.40625, "loss": 1.4055231809616089 }, { "id": "identity_312821", "image": "Super-CLEVR/images/superCLEVR_new_000169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large metal items than smaller objects?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 20.0, "loss": 4.901883602142334 }, { "id": "identity_161256", "image": "UniGeo/images/1090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that O is the midpoint of BC and the distances from A, D, and C to point O are equal, what is the length of segment AD if AB is 5 units?" }, { "from": "gpt", "value": "The answer is 5*sqrt(2) units" } ], "clip_score": 29.25, "loss": 1.7464509010314941 }, { "id": "identity_309581", "image": "ScienceQA/images/19553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.734375, "loss": 3.562119960784912 }, { "id": "identity_226699", "image": "TabMWP/images/3019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each twig has 10 beetles, how many beetles are on 7 twigs?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 34.875, "loss": 1.684398889541626 }, { "id": "identity_243369", "image": "Geometry3K/images/115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If \\( F K = 3x - 1 \\), \\( K G = 4y + 3 \\), \\( J K = 6y - 2 \\), and \\( K H = 2x + 3 \\), find the values of \\( x \\) and \\( y \\) so that the quadrilateral is a parallelogram" }, { "from": "gpt", "value": "The answer is \\( x = 4 \\) and \\( y = \\frac{5}{2} \\)" } ], "clip_score": 30.578125, "loss": 1.4579027891159058 }, { "id": "identity_299330", "image": "Super-CLEVR/images/superCLEVR_new_001310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are located behind the small red metal car or the big yellow tandem bikes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.75, "loss": 2.2791779041290283 }, { "id": "identity_181004", "image": "IconQA/images/5834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Assuming the pyramid is made of paper and the blue face is the base, how many edges need to be folded to form the pyramid?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.375, "loss": 2.1751863956451416 }, { "id": "identity_271636", "image": "TQA/images/question_images/cycle_rock_6739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What process occurs prior to the formation of sedimentary rocks from sediments?\nChoices:\n(A) cooling\n(B) burial, compaction and cementation\n(C) burial\n(D) weathering" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.71875, "loss": 3.975054979324341 }, { "id": "identity_22051", "image": "PMC-VQA/images/PMC6388372_ijerph-16-00414-f003_440267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the images? \nChoices:\n(A) Cells under a microscope. \n(B) An animal’s bone structure. \n(C) The cross-section of a leaf. \n(D) The surface and cross-section of a material. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.75, "loss": 3.1165521144866943 }, { "id": "identity_319770", "image": "AI2D/images/3977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which is the center part of this diagram?\nChoices:\n(A) basal ring\n(B) radial spine\n(C) apical ring\n(D) apical window" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.78125, "loss": 4.211993217468262 }, { "id": "identity_320324", "image": "IconQA/images/107100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there an even or odd number of triangles?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 3.9084465503692627 }, { "id": "identity_104569", "image": "A-OKVQA/images/171950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company's logo is visible on the batter's helmet?" }, { "from": "gpt", "value": "The answer is New Era" } ], "clip_score": 22.75, "loss": 2.808328628540039 }, { "id": "identity_235264", "image": "TabMWP/images/35075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of students attending all the elementary schools combined, and how many more students attend North River Elementary than the average number of students per school?" }, { "from": "gpt", "value": "The answer is 166.2 students" } ], "clip_score": 36.71875, "loss": 3.3750829696655273 }, { "id": "identity_270140", "image": "DVQA/images/bar_train_00041693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all the stores, which item had the highest total number of units sold?" }, { "from": "gpt", "value": "The answer is clouds" } ], "clip_score": 27.90625, "loss": 3.0448992252349854 }, { "id": "identity_40966", "image": "DVQA/images/bar_train_00179293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the red bar for \"camera\"?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 27.75, "loss": 2.6009013652801514 }, { "id": "identity_321470", "image": "AI2D/images/2214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, through which process do diploid zygotes become spores?\n\nChoices:\n(A) haploid gametes\n(B) fertilization\n(C) multicellular haploid\n(D) meiosis" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.28125, "loss": 3.782283067703247 }, { "id": "identity_189702", "image": "PMC-VQA/images/PMC7684285_Fig4_303751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screws can be counted in the image?" }, { "from": "gpt", "value": "The answer is Count the number of visible screws in the image" } ], "clip_score": 18.640625, "loss": 2.0058445930480957 }, { "id": "identity_276159", "image": "UniGeo/images/1414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, PA, PB, and CD are tangent to circle O at points A, B, and E respectively. If ∠COD = 50°, what is the measure of ∠P?\nChoices:\n(A) 80°\n(B) 55°\n(C) 130°\n(D) 65°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.21875, "loss": 2.3552565574645996 }, { "id": "identity_275126", "image": "TabMWP/images/21492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the increase in the population density of Nevada per square mile from 1990 to 2000, as shown in the almanac data Joy found at the library?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 37.03125, "loss": 2.545081615447998 }, { "id": "identity_156094", "image": "TQA/images/question_images/earth_moon_phases_2560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many phases of the moon are depicted in this image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.34375, "loss": 4.0223612785339355 }, { "id": "identity_329366", "image": "PMC-VQA/images/PMC5033377_pone.0162969.g002_76274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of cells were shown in this figure?\nChoices:\n(A) Astrocytes\n(B) Cr+ neurons\n(C) Sensory neurons\n(D) Motor neurons" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.296875, "loss": 3.9890666007995605 }, { "id": "identity_318672", "image": "VQA2.0/images/160064.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are sitting?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 14.9140625, "loss": 2.3391878604888916 }, { "id": "identity_122246", "image": "VQA-RAD/images/synpic54802.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality can be observed in the right hemisphere of the brain?" }, { "from": "gpt", "value": "The answer is Hyperintensity in the right hemisphere" } ], "clip_score": 29.53125, "loss": 2.2263102531433105 }, { "id": "identity_28862", "image": "DocVQA/images/kzbf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where will be the meeting held?" }, { "from": "gpt", "value": "The answer is EAST PAVILION CONFERENCE ROOM" } ], "clip_score": 19.078125, "loss": 1.5280998945236206 }, { "id": "identity_146273", "image": "ChartQA/images/two_col_100846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest population share percentage depicted in the graph?" }, { "from": "gpt", "value": "The answer is 27%" } ], "clip_score": 26.5625, "loss": 1.9058566093444824 }, { "id": "identity_81053", "image": "UniGeo/images/2584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given rhombus ABCD with diagonal AC, if AB = 2 cm and angle B = 120°, what is the length of diagonal AC?" }, { "from": "gpt", "value": "The answer is 2√3 cm" } ], "clip_score": 32.0, "loss": 1.445807695388794 }, { "id": "identity_285663", "image": "IconQA/images/14500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does Mason's watch show while he is jumping in the mud on a rainy afternoon? Is it 3:00 P.M. or 3:00 A.M.?\nChoices:\n(A) 3:00 P.M.\n(B) 3:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.015625, "loss": 3.502133846282959 }, { "id": "identity_285293", "image": "DocVQA/images/sngx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the grant number for the NFI?" }, { "from": "gpt", "value": "The answer is 517E" } ], "clip_score": 18.15625, "loss": 3.353742837905884 }, { "id": "identity_296546", "image": "UniGeo/images/1197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, the radius of circle O, OC, is 10cm. Line AB is perpendicular to OC, with the foot of the perpendicular being H, intersecting circle O at points A and B. If AB measures 12cm, then when line AB is translated downwards along OC to be tangent to circle O, what is the distance of the translation?\nChoices:\n(A) 2cm\n(B) 3cm\n(C) 4cm\n(D) 5cm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.875, "loss": 2.1365575790405273 }, { "id": "identity_99554", "image": "TabMWP/images/22342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of miles Bridget hiked during her camping trip?" }, { "from": "gpt", "value": "The answer is 20 miles" } ], "clip_score": 34.46875, "loss": 2.2124428749084473 }, { "id": "identity_230483", "image": "PMC-VQA/images/PMC6032807_F14_338297.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique, known for its detailed cross-sectional images, revealed the sclerosis of manubrium sterni with medial clavicles, and what are the primary advantages of this technique over MRI and X-ray in diagnosing bone abnormalities?" }, { "from": "gpt", "value": "The answer is CT provides detailed cross-sectional images and detects subtle bone abnormalities" } ], "clip_score": 30.59375, "loss": 2.1260550022125244 }, { "id": "identity_169462", "image": "IconQA/images/63586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour and minute hands of the watch?" }, { "from": "gpt", "value": "The answer is 165 degrees" } ], "clip_score": 28.46875, "loss": 2.1431453227996826 }, { "id": "identity_29651", "image": "VQA2.0/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the box on the bike say?" }, { "from": "gpt", "value": "The answer is sierra phantom - 1946" } ], "clip_score": 27.34375, "loss": 3.3321805000305176 }, { "id": "identity_249265", "image": "GeoQA+/images/484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point A moves along ray OP and point B moves along ray OM, and AE and BE are the angle bisectors of ∠BAO and ∠ABO respectively, what is the measure of ∠AEB when ∠BAO is 60°?" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 32.90625, "loss": 1.5230071544647217 }, { "id": "identity_266850", "image": "AI2D/images/298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If snakes were hunted and killed, which organisms in the food chain would be impacted? \n(A) lion\n(B) wild cat\n(C) kites\n(D) rabbit" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.53125, "loss": 3.8797435760498047 }, { "id": "identity_303329", "image": "FigureQA/images/15431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle greater than another color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.078125, "loss": 3.8822989463806152 }, { "id": "identity_22485", "image": "DocVQA/images/nxyc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the report?" }, { "from": "gpt", "value": "The answer is December 1961" } ], "clip_score": 26.90625, "loss": 2.2920241355895996 }, { "id": "identity_60537", "image": "DocVQA/images/skcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many copies of \"A Report on Health Care of the Poor Program & Policies\" are enclosed?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 19.84375, "loss": 3.741856336593628 }, { "id": "identity_246960", "image": "A-OKVQA/images/330649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the safety concerns, what are the potential risks for the person in the car with the door open, and what measures could be taken to mitigate these risks?" }, { "from": "gpt", "value": "The answer is Collision, door damage, personal injury; park safely, use hazard lights" } ], "clip_score": 20.046875, "loss": 3.873187780380249 }, { "id": "identity_333169", "image": "TabMWP/images/12045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the rate of change in the number of entries between 2014 and 2015 according to the table?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.46875, "loss": 3.0224764347076416 }, { "id": "identity_317627", "image": "ScienceQA/images/11592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which story is set in the eighteenth century according to the table?\nChoices:\n(A) The Time Machine\n(B) Rip Van Winkle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.828125, "loss": 3.5694968700408936 }, { "id": "identity_191252", "image": "VizWiz/images/VizWiz_train_00012755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the packaging of the flour bag?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 32.1875, "loss": 3.540499210357666 }, { "id": "identity_174256", "image": "IconQA/images/48300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the rectangle?" }, { "from": "gpt", "value": "The answer is 15 cm^2" } ], "clip_score": 31.8125, "loss": 1.637647032737732 }, { "id": "identity_197017", "image": "VQA2.0/images/39080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is visible on the plate to the right of the orange juice pitcher?" }, { "from": "gpt", "value": "The answer is Bananas" } ], "clip_score": 24.390625, "loss": 2.8723957538604736 }, { "id": "identity_166066", "image": "TabMWP/images/30640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles sold by all dealerships combined?" }, { "from": "gpt", "value": "The answer is 138" } ], "clip_score": 30.25, "loss": 2.0248639583587646 }, { "id": "identity_124917", "image": "PMC-VQA/images/PMC6833318_RSIF20190454F2_62785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the x-axis in the image?" }, { "from": "gpt", "value": "The answer is Horizontal from left to right" } ], "clip_score": 14.7109375, "loss": 3.0868401527404785 }, { "id": "identity_83432", "image": "GeoQA+/images/1882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB = 6, BC = 4, and DF = 9, what is the length of segment EF?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.3125, "loss": 2.5437426567077637 }, { "id": "identity_58296", "image": "FigureQA/images/70794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis value does Orchid intersect with Light Gold?" }, { "from": "gpt", "value": "The answer is Approximately 38" } ], "clip_score": 36.59375, "loss": 2.695221424102783 }, { "id": "identity_21926", "image": "VQA2.0/images/162295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What food is this?" }, { "from": "gpt", "value": "The answer is pizza" } ], "clip_score": 22.0625, "loss": 2.5023105144500732 }, { "id": "identity_153210", "image": "ScienceQA/images/805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly north of the highlighted state?" }, { "from": "gpt", "value": "The answer is Wyoming" } ], "clip_score": 32.625, "loss": 2.500319242477417 }, { "id": "identity_76997", "image": "TQA/images/question_images/faults_1738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How are the layers of earth deformed at a Divergent Plate Boundary compared to a Transform Plate Boundary?" }, { "from": "gpt", "value": "The answer is Divergent stretches/thins, Transform has lateral shift" } ], "clip_score": 31.375, "loss": 2.7143049240112305 }, { "id": "identity_37305", "image": "AI2D/images/3187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is at the very top of the plant?\nChoices:\n(A) shoot tip\n(B) leaf\n(C) none of the above\n(D) root tip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.515625, "loss": 4.2264204025268555 }, { "id": "identity_6723", "image": "TQA/images/abc_question_images/human_system_muscular_16157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the corresponding label for the pectoralis major?\nChoices:\n(A) Y\n(B) C\n(C) E\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5, "loss": 4.409230709075928 }, { "id": "identity_295432", "image": "PMC-VQA/images/PMC9175008_F4_304069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of vessels are indicated by the yellow arrowheads in the illustration? \n(A) Arteries \n(B) Veins \n(C) Capillaries \n(D) Lymphatics" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.34375, "loss": 4.400464057922363 }, { "id": "identity_228675", "image": "FigureQA/images/75409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the second largest area under the curve?" }, { "from": "gpt", "value": "The answer is Blue Violet" } ], "clip_score": 23.890625, "loss": 2.3084988594055176 }, { "id": "identity_211724", "image": "VQA2.0/images/174185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the architectural style of the building in the background?" }, { "from": "gpt", "value": "The answer is Historical or traditional European architecture" } ], "clip_score": 21.375, "loss": 3.9252407550811768 }, { "id": "identity_233351", "image": "Super-CLEVR/images/superCLEVR_new_001921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Four objects: car, motorcycle, road bike, aeroplane" } ], "clip_score": 29.875, "loss": 2.9917209148406982 }, { "id": "identity_227090", "image": "Super-CLEVR/images/superCLEVR_new_000751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the metallic bicycle and the red metal object in front of the tiny red thing both smaller than the green car?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 32.71875, "loss": 5.092662811279297 }, { "id": "identity_284077", "image": "IconQA/images/30834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Estimate the number of marbles. Which of the following choices is the closest estimate:\n(A) approximately 30\n(B) approximately 90" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.140625, "loss": 3.3437461853027344 }, { "id": "identity_66984", "image": "AI2D/images/1836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a common prey for multiple predators in the food web?" }, { "from": "gpt", "value": "The answer is Mouse" } ], "clip_score": 32.53125, "loss": 2.7907474040985107 }, { "id": "identity_332577", "image": "TabMWP/images/1696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: While looking through an almanac at the library, Steven noticed some data showing the population density of various states. In 1990, which state had a lower population density, North Dakota or Nevada?\nChoices:\n(A) Nevada\n(B) North Dakota" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.53125, "loss": 3.4603476524353027 }, { "id": "identity_182153", "image": "PMC-VQA/images/PMC4378709_fig6_373043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the mass indicated by the arrow?" }, { "from": "gpt", "value": "The answer is Small, exact measurement not possible" } ], "clip_score": 18.484375, "loss": 4.164656162261963 }, { "id": "identity_211869", "image": "VQA2.0/images/143528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person in the red jersey wearing?" }, { "from": "gpt", "value": "The answer is Sneakers" } ], "clip_score": 20.328125, "loss": 2.0572400093078613 }, { "id": "identity_333623", "image": "VQA2.0/images/28311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many creatures are in the picture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.265625, "loss": 2.035698652267456 }, { "id": "identity_304376", "image": "UniGeo/images/3131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把一个直角三角板的直角顶点放在直尺的一边上,若∠2=42°,则∠1=()\nChoices:\n(A) 48°\n(B) 42°\n(C) 40°\n(D) 45°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.25, "loss": 2.8233706951141357 }, { "id": "identity_281238", "image": "AI2D/images/2019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism in the food web diagram is classified as the producer?\nChoices:\n(A) herbivores\n(B) decomposers\n(C) plants\n(D) primary carnivores" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.1875, "loss": 3.927431106567383 }, { "id": "identity_190112", "image": "TextVQA/images/f1cc427fa186604e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company featured in the image?" }, { "from": "gpt", "value": "The answer is KOKUYO" } ], "clip_score": 25.359375, "loss": 2.140082597732544 }, { "id": "identity_43755", "image": "DVQA/images/bar_train_00050159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any datasets with an accuracy exactly at 5?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.0625, "loss": 3.1427242755889893 }, { "id": "identity_294339", "image": "Super-CLEVR/images/superCLEVR_new_001014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object that is located in front of the blue object and to the right of the small tandem bike?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 23.40625, "loss": 3.958117961883545 }, { "id": "identity_280878", "image": "PlotQA/images/25903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest cereal yield recorded across all years?" }, { "from": "gpt", "value": "The answer is 966.347" } ], "clip_score": 26.15625, "loss": 2.9082753658294678 }, { "id": "identity_145465", "image": "MapQA/images/map_8734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value of less than 3M signs?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 27.25, "loss": 3.811608076095581 }, { "id": "identity_306451", "image": "DocVQA/images/ymhm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of this page?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 14.984375, "loss": 4.11447811126709 }, { "id": "identity_182558", "image": "PMC-VQA/images/PMC7849607_F2_357213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.421875, "loss": 2.485652208328247 }, { "id": "identity_101006", "image": "CLEVR-Math/images/CLEVR_train_010888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.40625, "loss": 2.4920670986175537 }, { "id": "identity_228806", "image": "A-OKVQA/images/214924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the country whose product is being served in the restaurant, and also determine the type of beverage being consumed" }, { "from": "gpt", "value": "The answer is Mexico, beer" } ], "clip_score": 20.015625, "loss": 3.42195463180542 }, { "id": "identity_178283", "image": "IconQA/images/13830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stars are in each row?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.34375, "loss": 2.277971029281616 }, { "id": "identity_288809", "image": "PMC-VQA/images/PMC9356014_Fig1_365449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the third row of the diagram?\nChoices:\n(A) Anterior vascular leakage grading, \n(B) Posterior vascular leakage grading, \n(C) Macular hyperfluorescence grading, \n(D) Leakage domain grading." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.6875, "loss": 4.376317024230957 }, { "id": "identity_163460", "image": "GeoQA+/images/1864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with points D, E, and F as shown in the diagram, if AB = 10, BC = 8, and DE = 4.5, what is the measure of angle ADB?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 34.8125, "loss": 2.4439892768859863 }, { "id": "identity_188457", "image": "TextVQA/images/24fac58b751faa9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the watch's bezel?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 24.46875, "loss": 3.399831533432007 }, { "id": "identity_176849", "image": "CLEVR-Math/images/CLEVR_train_005254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the purple cylinder?" }, { "from": "gpt", "value": "The answer is 3 (1 red block, 1 green cube, and 1 blue cube)" } ], "clip_score": 29.046875, "loss": 1.5322327613830566 }, { "id": "identity_67103", "image": "ScienceQA/images/3171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the body of water shown in the image likely to be a sea or an ocean?" }, { "from": "gpt", "value": "The answer is Sea" } ], "clip_score": 20.890625, "loss": 3.447721004486084 }, { "id": "identity_328506", "image": "PMC-VQA/images/PMC2614448_f4_32142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the invaginated structure observed in the topmost spherule?\nChoices:\n(A) Synaptic ribbon\n(B) Cross-section of a rod soma\n(C) HCat process\n(D) None of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.359375, "loss": 4.648108005523682 }, { "id": "identity_288758", "image": "ScienceQA/images/19758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Provo\n(B) Trenton\n(C) Newark\n(D) Jersey City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.53125, "loss": 4.079249382019043 }, { "id": "identity_83459", "image": "GeoQA+/images/1208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C在⊙O上,∠ABC=31°,过点C作⊙O的切线交OA的延长线于点D.若∠ACB=90°,则∠C的大小为多少度?" }, { "from": "gpt", "value": "The answer is 59°" } ], "clip_score": 30.09375, "loss": 1.9699679613113403 }, { "id": "identity_183669", "image": "PMC-VQA/images/PMC7737277_Fig2_321109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to capture this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.40625, "loss": 2.521631956100464 }, { "id": "identity_149107", "image": "FigureQA/images/9610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a value less than 30?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.890625, "loss": 2.485020637512207 }, { "id": "identity_336174", "image": "DocVQA/images/qqdb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this event held?" }, { "from": "gpt", "value": "The answer is April 19-20, 1977" } ], "clip_score": 24.046875, "loss": 1.374312162399292 }, { "id": "identity_138333", "image": "MapQA/images/map_607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Florida?" }, { "from": "gpt", "value": "The answer is 46.0%-46.0%" } ], "clip_score": 26.9375, "loss": 1.9944283962249756 }, { "id": "identity_251451", "image": "ScienceQA/images/5633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees around its center, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is repel" } ], "clip_score": 23.859375, "loss": 2.4206526279449463 }, { "id": "identity_121228", "image": "VQA2.0/images/297462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles are visible on the ferry dock?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 27.609375, "loss": 4.487771034240723 }, { "id": "identity_126094", "image": "Super-CLEVR/images/superCLEVR_new_000288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.28125, "loss": 3.096754789352417 }, { "id": "identity_191955", "image": "PMC-VQA/images/PMC8338867_Fig1_26815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the electrode positioned centrally within the brain?" }, { "from": "gpt", "value": "The answer is Yes, the electrode is positioned centrally within the brain" } ], "clip_score": 29.296875, "loss": 1.2097936868667603 }, { "id": "identity_280778", "image": "DocVQA/images/fhwc0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many samples are there on the Diet in total?" }, { "from": "gpt", "value": "The answer is 12,781" } ], "clip_score": 21.6875, "loss": 2.198538064956665 }, { "id": "identity_129869", "image": "A-OKVQA/images/550287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.375, "loss": 2.3303842544555664 }, { "id": "identity_224324", "image": "VQA2.0/images/224125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buses are there in total, and what fraction of them are double deckers?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 25.71875, "loss": 3.549895763397217 }, { "id": "identity_274494", "image": "A-OKVQA/images/67015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of bears present? \nChoices:\n(A) two\n(B) twenty\n(C) three\n(D) none" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.0, "loss": 3.7333340644836426 }, { "id": "identity_221800", "image": "TextVQA/images/b3f6e1220246af6a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands of fireworks are displayed?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.953125, "loss": 2.325770139694214 }, { "id": "identity_51779", "image": "DocVQA/images/nfnw0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total price of the books ordered?" }, { "from": "gpt", "value": "The answer is 1640.00" } ], "clip_score": 18.4375, "loss": 2.197216272354126 }, { "id": "identity_335314", "image": "DVQA/images/bar_train_00155703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars each a single solid color without any patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.4375, "loss": 3.359240770339966 }, { "id": "identity_99502", "image": "IconQA/images/97910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one diamond, what percentage of the original number of diamonds is left?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 20.984375, "loss": 1.3448907136917114 }, { "id": "identity_214104", "image": "PMC-VQA/images/PMC8598322_fig4_114321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other colors visible in the liquid besides the main color?" }, { "from": "gpt", "value": "The answer is No, only one color is visible" } ], "clip_score": 20.21875, "loss": 2.467116594314575 }, { "id": "identity_104201", "image": "VQA-RAD/images/synpic48714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of skull fracture in this image?" }, { "from": "gpt", "value": "The answer is No evidence of skull fracture" } ], "clip_score": 26.25, "loss": 2.3319573402404785 }, { "id": "identity_72559", "image": "AI2D/images/304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal in this food chain is an omnivore?" }, { "from": "gpt", "value": "The answer is bear" } ], "clip_score": 29.671875, "loss": 2.991926908493042 }, { "id": "identity_19701", "image": "Super-CLEVR/images/superCLEVR_new_001110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many other objects are there of the same material as the big purple sedan?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.90625, "loss": 2.4354746341705322 }, { "id": "identity_12650", "image": "FigureQA/images/15088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 4.245242118835449 }, { "id": "identity_162689", "image": "GeoQA+/images/3968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with M as the midpoint of BC, AN bisecting angle BAC, and BN perpendicular to AN, find the measure of angle BAN given that AB = 14 and AC = 19" }, { "from": "gpt", "value": "The answer is arctan(√(196 - 266)/14)" } ], "clip_score": 34.40625, "loss": 2.3456432819366455 }, { "id": "identity_164800", "image": "GeoQA+/images/638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After the ladder slides down 4m, how far will the bottom of the ladder be from the wall?" }, { "from": "gpt", "value": "The answer is distance from wall = sqrt((25m)^2 - (21m)^2)" } ], "clip_score": 28.40625, "loss": 1.9898983240127563 }, { "id": "identity_244775", "image": "DocVQA/images/ftmf0024_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in the Business Contact Program to gain participation from businesses with a large number of employees, and what should be done if larger companies cannot be obtained?" }, { "from": "gpt", "value": "The answer is Build relationships, solicit participation, target large businesses, shift focus" } ], "clip_score": 20.640625, "loss": 3.7810325622558594 }, { "id": "identity_192960", "image": "PMC-VQA/images/PMC6861597_fig0002_70162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of atrophy in this brain image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.671875, "loss": 2.8327388763427734 }, { "id": "identity_205181", "image": "TextVQA/images/69bb92e238e6fa80.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are standing on the platform to the right?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.71875, "loss": 4.491930961608887 }, { "id": "identity_18899", "image": "TextVQA/images/06a7f1d5b80884dd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the name of the shop?" }, { "from": "gpt", "value": "The answer is boucheries roger" } ], "clip_score": 31.53125, "loss": 2.9451825618743896 }, { "id": "identity_28527", "image": "DVQA/images/bar_train_00124064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.03125, "loss": 3.524181365966797 }, { "id": "identity_239493", "image": "ScienceQA/images/12389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its three largest cities by population" }, { "from": "gpt", "value": "The answer is Havana, Santiago de Cuba, Camagüey" } ], "clip_score": 31.515625, "loss": 1.837081789970398 }, { "id": "identity_189965", "image": "PMC-VQA/images/PMC5725823_Fig1_251724.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain in the given MR image?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 30.15625, "loss": 3.6172244548797607 }, { "id": "identity_30934", "image": "VQA-AS/images/6897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the girl on the left happy?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.9375, "loss": 3.4498000144958496 }, { "id": "identity_258299", "image": "PlotQA/images/83638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of the population that has access to non-solid fuel in rural and urban areas in 2010 and 2012?" }, { "from": "gpt", "value": "The answer is 87.5%" } ], "clip_score": 27.34375, "loss": 1.8855974674224854 }, { "id": "identity_124171", "image": "VQA2.0/images/581065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the cake resembling?" }, { "from": "gpt", "value": "The answer is The American flag" } ], "clip_score": 27.03125, "loss": 3.316307783126831 }, { "id": "identity_167394", "image": "TabMWP/images/11348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students in total play instruments in the band?" }, { "from": "gpt", "value": "The answer is 91" } ], "clip_score": 34.71875, "loss": 2.5740602016448975 }, { "id": "identity_173367", "image": "TabMWP/images/11455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the hammock and the wooden stool?" }, { "from": "gpt", "value": "The answer is $54" } ], "clip_score": 34.21875, "loss": 2.9626543521881104 }, { "id": "identity_23834", "image": "DVQA/images/bar_train_00152446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from the bottom?" }, { "from": "gpt", "value": "The answer is muscle" } ], "clip_score": 23.71875, "loss": 3.3957135677337646 }, { "id": "identity_5852", "image": "FigureQA/images/48399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.78125, "loss": 4.187776565551758 }, { "id": "identity_61683", "image": "DocVQA/images/yncd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Chairman of the Committee on Health Care of the Poor?" }, { "from": "gpt", "value": "The answer is Robert C. Long, M.D" } ], "clip_score": 22.796875, "loss": 2.605708122253418 }, { "id": "identity_125626", "image": "PMC-VQA/images/PMC7849915_FIG6_357283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the white arrow?" }, { "from": "gpt", "value": "The answer is The white arrow points to the stomach/intestines" } ], "clip_score": 24.828125, "loss": 2.497208833694458 }, { "id": "identity_308165", "image": "UniGeo/images/2748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the diagram, \\(DC \\parallel AB\\), \\(\\angle BAE = \\angle BCD\\), \\(AE \\perp DE\\), and \\(\\angle C = 130^\\circ\\), what is the measure of \\(\\angle D\\)?\n\nChoices:\n(A) 160°\n(B) 140°\n(C) 130°\n(D) 125°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 2.359109401702881 }, { "id": "identity_83887", "image": "GeoQA+/images/4192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of AD is 10 units, what is the length of PF?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 24.234375, "loss": 2.201366901397705 }, { "id": "identity_194555", "image": "PMC-VQA/images/PMC6445604_F4_460291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the arrow pointing to the area of interest?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.6328125, "loss": 3.8235461711883545 }, { "id": "identity_29206", "image": "IconQA/images/8112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) triangle\n(B) square\n(C) circle" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0625, "loss": 4.162391662597656 }, { "id": "identity_148141", "image": "DocVQA/images/nsnm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of The Nutrition Foundation?" }, { "from": "gpt", "value": "The answer is 489 Fifth Avenue, New York, New York 10016" } ], "clip_score": 19.5, "loss": 1.3306809663772583 }, { "id": "identity_175126", "image": "TabMWP/images/20965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you start at sea level and descend to the underground lake, how many feet have you descended?" }, { "from": "gpt", "value": "The answer is 350 feet" } ], "clip_score": 29.390625, "loss": 1.2155975103378296 }, { "id": "identity_161134", "image": "UniGeo/images/1187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 31.609375, "loss": 2.3037774562835693 }, { "id": "identity_159726", "image": "GeoQA+/images/5916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the side length of square N is decreased by 1 cm, what will be the new area of square N in cm^2?" }, { "from": "gpt", "value": "The answer is 16 cm^2" } ], "clip_score": 34.1875, "loss": 1.6932334899902344 }, { "id": "identity_203671", "image": "PMC-VQA/images/PMC3446384_F6_156024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of cell differentiation or specialization in this image?" }, { "from": "gpt", "value": "The answer is No clear indication of differentiation or specialization" } ], "clip_score": 26.828125, "loss": 2.680671453475952 }, { "id": "identity_174436", "image": "IconQA/images/92743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color on the tennis racket's grip?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 34.34375, "loss": 2.5591115951538086 }, { "id": "identity_219579", "image": "VQA2.0/images/377756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a no parking sign visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.640625, "loss": 3.2089924812316895 }, { "id": "identity_84585", "image": "UniGeo/images/3454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle OEC in the circle O?" }, { "from": "gpt", "value": "The answer is 3√3 cm²" } ], "clip_score": 34.84375, "loss": 2.383246898651123 }, { "id": "identity_122776", "image": "A-OKVQA/images/408680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person on the snowboard wearing a helmet?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.46875, "loss": 3.2608211040496826 }, { "id": "identity_275103", "image": "FigureQA/images/20081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orchid not the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.90625, "loss": 3.7202656269073486 }, { "id": "identity_283290", "image": "ScienceQA/images/15364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from the surroundings to each salmon or from each salmon to the surroundings?\nChoices:\n(A) the surroundings . . . each salmon\n(B) each salmon . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.9375, "loss": 4.011876583099365 }, { "id": "identity_200743", "image": "A-OKVQA/images/392851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes of traffic are visible in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 12.7265625, "loss": 2.6627204418182373 }, { "id": "identity_183562", "image": "PMC-VQA/images/PMC6248249_f1_399487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heart rate shown on the ultrasound image?" }, { "from": "gpt", "value": "The answer is 83 bpm" } ], "clip_score": 35.84375, "loss": 2.1027135848999023 }, { "id": "identity_197864", "image": "VQA2.0/images/147538.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella the person is holding?" }, { "from": "gpt", "value": "The answer is Pink and white" } ], "clip_score": 20.9375, "loss": 3.371361255645752 }, { "id": "identity_265958", "image": "AI2D/images/3638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which blood vessel supplies blood to the spleen? \n(A) descending colon\n(B) hepatic blood vessels\n(C) splenic artery\n(D) renal blood vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.25, "loss": 3.964531421661377 }, { "id": "identity_118738", "image": "VQA2.0/images/216278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the bus?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 29.015625, "loss": 3.615142822265625 }, { "id": "identity_116867", "image": "A-OKVQA/images/194744.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two words are visible on the pizza box?" }, { "from": "gpt", "value": "The answer is AUTHENTIC RECIPES" } ], "clip_score": 23.84375, "loss": 1.5507515668869019 }, { "id": "identity_292987", "image": "TQA/images/question_images/hair_follicles_6989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure that secretes oil among the following options: (A) Sebaceous gland, (B) Bulge, (C) Arrector pili muscle, (D) Hair bulb?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.96875, "loss": 3.952843427658081 }, { "id": "identity_281337", "image": "TabMWP/images/34783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money did Rick spend at the store when he purchased 3 kilograms of Manila scrap and 5 kilograms of vellum scrap?" }, { "from": "gpt", "value": "The answer is 8.71" } ], "clip_score": 31.625, "loss": 2.5038185119628906 }, { "id": "identity_72139", "image": "ScienceQA/images/6302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate latitude and longitude of the center of the coldest area within the outlined region?" }, { "from": "gpt", "value": "The answer is Approximately 60°S, 90°W" } ], "clip_score": 23.375, "loss": 1.7773804664611816 }, { "id": "identity_242177", "image": "TextVQA/images/5bac2a18581539f3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what languages are used on this stop sign?" }, { "from": "gpt", "value": "The answer is English and French" } ], "clip_score": 31.25, "loss": 2.0911643505096436 }, { "id": "identity_172186", "image": "IconQA/images/47983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun twice, what is the probability that it will land on green both times?" }, { "from": "gpt", "value": "The answer is 9/16" } ], "clip_score": 30.578125, "loss": 2.265841245651245 }, { "id": "identity_29379", "image": "DVQA/images/bar_train_00186937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.0625, "loss": 3.733396053314209 }, { "id": "identity_311819", "image": "PlotQA/images/153246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the difference between the price value of exports in these two countries greater than the difference between any other two countries?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.828125, "loss": 2.8787589073181152 }, { "id": "identity_76293", "image": "AI2D/images/1070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure in the diagram is responsible for producing ribosomes?" }, { "from": "gpt", "value": "The answer is Nucleolus" } ], "clip_score": 25.609375, "loss": 1.457601547241211 }, { "id": "identity_53482", "image": "FigureQA/images/95682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the highest point for Medium Periwinkle?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 21.359375, "loss": 2.069725275039673 }, { "id": "identity_172711", "image": "IconQA/images/40235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 2 diamonds, how many diamonds will be left?" }, { "from": "gpt", "value": "The answer is 3 diamonds" } ], "clip_score": 30.015625, "loss": 1.9521104097366333 }, { "id": "identity_42322", "image": "FigureQA/images/69506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Dark Olive?" }, { "from": "gpt", "value": "The answer is 7 (from 87 to 94)" } ], "clip_score": 25.90625, "loss": 2.3222756385803223 }, { "id": "identity_98228", "image": "IconQA/images/14319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many houses have a yellow exterior?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.109375, "loss": 2.3838751316070557 }, { "id": "identity_118932", "image": "Super-CLEVR/images/superCLEVR_new_000719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly behind the green motorcycle?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 29.65625, "loss": 3.3507306575775146 }, { "id": "identity_336749", "image": "TabMWP/images/12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A city planner counted the number of floors per building in the downtown area. How many buildings have at least some floors but fewer than some other floors?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.671875, "loss": 2.1814815998077393 }, { "id": "identity_124413", "image": "PMC-VQA/images/PMC5557509_Fig1_203150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the yellow line indicate in the image?" }, { "from": "gpt", "value": "The answer is Measurement of wall thickness" } ], "clip_score": 22.953125, "loss": 3.3403496742248535 }, { "id": "identity_75790", "image": "TQA/images/question_images/human_system_reproductory_7043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Fimbria\"?" }, { "from": "gpt", "value": "The answer is To guide the egg into the fallopian tube" } ], "clip_score": 31.65625, "loss": 1.9299770593643188 }, { "id": "identity_244873", "image": "ScienceQA/images/7035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total thermal energy lost by both cakes if the specific heat capacity of the cake is 2.5 J/g°C and the mass of each cake is 500 grams" }, { "from": "gpt", "value": "The answer is 28750 J" } ], "clip_score": 28.3125, "loss": 2.1647276878356934 }, { "id": "identity_143800", "image": "DocVQA/images/ghgw0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is agitation required for the storage of this product?" }, { "from": "gpt", "value": "The answer is NO" } ], "clip_score": 18.0625, "loss": 3.648815155029297 }, { "id": "identity_94481", "image": "TabMWP/images/27772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more students join the Graduation committee, how many students will be on that committee?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 28.375, "loss": 2.14341139793396 }, { "id": "identity_204571", "image": "A-OKVQA/images/457230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the horses equipped with any facial gear?" }, { "from": "gpt", "value": "The answer is Yes, bridles" } ], "clip_score": 27.34375, "loss": 3.5129926204681396 }, { "id": "identity_102454", "image": "CLEVR-Math/images/CLEVR_train_036539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of metallic objects to non-metallic objects?" }, { "from": "gpt", "value": "The answer is 2:5" } ], "clip_score": 24.828125, "loss": 2.0575573444366455 }, { "id": "identity_200594", "image": "VQA2.0/images/392408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sheep are standing on the elevated platform?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 29.609375, "loss": 2.3255512714385986 }, { "id": "identity_329218", "image": "GeoQA+/images/1203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,点D、E、F分别是各边的中点,若△ABC的面积为16cm2,则△DEF的面积是多少?\n选择:\n(A) 2\n(B) 4\n(C) 6\n(D) 8" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.90625, "loss": 2.3905816078186035 }, { "id": "identity_325050", "image": "PMC-VQA/images/PMC3178647_pone-0025316-g002_109634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the highlighted area of (A)?\nChoices:\n(A) Epon without antibody labeling\n(B) The purpose of Pex10-GFP in P.tricornutum\n(C) Dividing P. tricornutum\n(D) Immuno labeling of Pex10-GFP" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.953125, "loss": 4.740991115570068 }, { "id": "identity_315182", "image": "Geometry3K/images/1536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle C.\nChoices:\n(A) 38\n(B) 66\n(C) 72\n(D) 128" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.046875, "loss": 2.6034626960754395 }, { "id": "identity_214729", "image": "VQA2.0/images/369735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fruit is visible in the sandwich?" }, { "from": "gpt", "value": "The answer is Banana" } ], "clip_score": 22.140625, "loss": 2.293513059616089 }, { "id": "identity_16994", "image": "VQA2.0/images/129753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why does the girl have her eyes closed?" }, { "from": "gpt", "value": "The answer is picture snapped too soon" } ], "clip_score": 21.359375, "loss": 4.483832836151123 }, { "id": "identity_276430", "image": "TQA/images/question_images/optics_ray_diagrams_9165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the picture, how many different colors of arrows are displayed?\nChoices:\n(A) 3\n(B) 1\n(C) 4\n(D) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.421875, "loss": 2.774949550628662 }, { "id": "identity_334411", "image": "PMC-VQA/images/PMC5337383_fig1_134483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging finding in this?\n\nChoices:\n(A) A small mass with no impact on surrounding organs.\n(B) A large mass that has infiltrated surrounding vital organs.\n(C) A large mass with multiple areas of low attenuation without impacting vital organs.\n(D) A large mass with multiple areas of low attenuation that has invaded the heart and other vital organs." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.6875, "loss": 3.7291953563690186 }, { "id": "identity_98665", "image": "IconQA/images/65245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you color one more square yellow, what fraction of the squares will be yellow?" }, { "from": "gpt", "value": "The answer is 3/5 or 0.6" } ], "clip_score": 35.34375, "loss": 1.455269455909729 }, { "id": "identity_114876", "image": "VQA2.0/images/206427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs indicating organic produce at the stall?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.46875, "loss": 2.740872621536255 }, { "id": "identity_290936", "image": "UniGeo/images/2637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in quadrilateral ABCD, the angle bisectors of ∠ABC and ∠BCD intersect side AD at point E, with BE = 12 and CE = 5. What is the distance from point A to BC?\nChoices:\n(A) \\frac{12}{5}\n(B) 4\n(C) \\frac{60}{13}\n(D) \\frac{60}{7}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.078125, "loss": 2.320362091064453 }, { "id": "identity_185489", "image": "Super-CLEVR/images/superCLEVR_new_000829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.578125, "loss": 2.349313735961914 }, { "id": "identity_155464", "image": "ScienceQA/images/6974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water to the east of the highlighted country?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 25.953125, "loss": 2.5527937412261963 }, { "id": "identity_271926", "image": "GeoQA+/images/5181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A and D are on circle O, with BC as the diameter. If ∠ADC = 33°, what is the measure of ∠ACB? \n(A) 57°\n(B) 66°\n(C) 67°\n(D) 44°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.8125, "loss": 2.2790684700012207 }, { "id": "identity_158579", "image": "ScienceQA/images/12946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is Two magnets" } ], "clip_score": 20.921875, "loss": 3.6070239543914795 }, { "id": "identity_42312", "image": "PlotQA/images/155596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average price level ratio over the four years shown?" }, { "from": "gpt", "value": "The answer is Approximately 0.3" } ], "clip_score": 26.828125, "loss": 2.324730157852173 }, { "id": "identity_166962", "image": "TabMWP/images/37746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the number of lamps in each office is doubled, how many lamps will be in 4 offices?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 38.875, "loss": 2.1584701538085938 }, { "id": "identity_86534", "image": "UniGeo/images/486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB=AC=2 and ∠DEA is 30°, what is the measure of ∠AED?" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 30.671875, "loss": 1.464872121810913 }, { "id": "identity_105449", "image": "VQA2.0/images/262937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number visible on the front of the left bus?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.5, "loss": 2.5057733058929443 }, { "id": "identity_45246", "image": "DVQA/images/bar_train_00079404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people preferring the object crew between the two categories?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.921875, "loss": 2.4610073566436768 }, { "id": "identity_332024", "image": "A-OKVQA/images/533888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What would someone avoid if they preferred only cooked food?\nChoices:\n(A) yakitori\n(B) rice\n(C) shrimp tempura\n(D) sushi" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.515625, "loss": 3.4263579845428467 }, { "id": "identity_275134", "image": "TQA/images/abc_question_images/seasons_10164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which month does summer occur in the northern hemisphere?\nChoices:\n(A) C\n(B) M\n(C) Y\n(D) L" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.6875, "loss": 3.740023136138916 }, { "id": "identity_192046", "image": "PMC-VQA/images/PMC2100056_F1_15102.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of bowel obstruction in this CT scan?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 30.25, "loss": 2.686410665512085 }, { "id": "identity_103944", "image": "Super-CLEVR/images/superCLEVR_new_001482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more bicycles or helicopters in the image?" }, { "from": "gpt", "value": "The answer is More bicycles" } ], "clip_score": 26.1875, "loss": 2.188800811767578 }, { "id": "identity_176549", "image": "TabMWP/images/5199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the cereal boxes are mini size with two prizes?" }, { "from": "gpt", "value": "The answer is 3/16" } ], "clip_score": 29.765625, "loss": 1.983259677886963 }, { "id": "identity_99860", "image": "IconQA/images/62026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all the rectangles combined?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.234375, "loss": 1.7573291063308716 }, { "id": "identity_266582", "image": "VQA2.0/images/21152.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the man located, on the sand or in the water?" }, { "from": "gpt", "value": "The answer is in water" } ], "clip_score": 22.734375, "loss": 3.1467907428741455 }, { "id": "identity_94768", "image": "TabMWP/images/36917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price decreases from $670 to $650, by how much does the quantity demanded increase?" }, { "from": "gpt", "value": "The answer is 2,700" } ], "clip_score": 32.5, "loss": 2.062800168991089 }, { "id": "identity_287481", "image": "IconQA/images/8855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does the dotted line serve as a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.5, "loss": 3.8908519744873047 }, { "id": "identity_281412", "image": "PMC-VQA/images/PMC7249926_F1_178250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the MRI scan, at which location is the disc herniation found?\nChoices:\n(A) L3/4\n(B) L4/5\n(C) L5/S1\n(D) None of the above." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.25, "loss": 4.124741077423096 }, { "id": "identity_209269", "image": "PMC-VQA/images/PMC6855517_F1_68279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the body is marked with an 'R'?" }, { "from": "gpt", "value": "The answer is The patient's right side is marked with an 'R'" } ], "clip_score": 18.078125, "loss": 1.9812566041946411 }, { "id": "identity_40276", "image": "FigureQA/images/82752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of x-axis values for Light Salmon?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 23.421875, "loss": 2.34183406829834 }, { "id": "identity_280900", "image": "Geometry3K/images/92.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If m is the perpendicular bisector of XZ, and WZ is equal to 4a - 15 and WZ is equal to a + 12, what is the value of WX? \n(A) 12\n(B) 15\n(C) 21\n(D) 22.4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.0625, "loss": 2.4514029026031494 }, { "id": "identity_53878", "image": "FigureQA/images/83769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the lowest data point for Dark Cyan?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 25.4375, "loss": 2.197624683380127 }, { "id": "identity_52688", "image": "FigureQA/images/4357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Gray have the largest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.125, "loss": 3.0829761028289795 }, { "id": "identity_59912", "image": "ChartQA/images/multi_col_40227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which reason has the closest percentage of \"Yes, absolutely\" and \"Yes, somewhat\" responses?" }, { "from": "gpt", "value": "The answer is You have difficulties finding books that you like" } ], "clip_score": 27.515625, "loss": 2.3774423599243164 }, { "id": "identity_3712", "image": "FigureQA/images/19909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.78125, "loss": 4.71815824508667 }, { "id": "identity_7632", "image": "VQA2.0/images/31561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Would a person be able to use the computer like it is?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.984375, "loss": 3.0882675647735596 }, { "id": "identity_163601", "image": "GeoQA+/images/5001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If DE is the perpendicular bisector of AB in triangle ABC, AC = 9, and BC = 5, what is the measure of angle AEB?" }, { "from": "gpt", "value": "The answer is 180" } ], "clip_score": 34.40625, "loss": 2.3884189128875732 }, { "id": "identity_296355", "image": "DocVQA/images/ghgw0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide information on the type of material GL?" }, { "from": "gpt", "value": "The answer is GLYCERINE" } ], "clip_score": 20.6875, "loss": 2.8045411109924316 }, { "id": "identity_184036", "image": "A-OKVQA/images/387857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows are there in the room?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.0625, "loss": 2.508805513381958 }, { "id": "identity_83131", "image": "Geometry3K/images/1594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle 5 if angle 2 is 63 degrees and angle 4 is 180 degrees" }, { "from": "gpt", "value": "The answer is 133 degrees" } ], "clip_score": 33.1875, "loss": 2.0327649116516113 }, { "id": "identity_287848", "image": "AI2D/images/4425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify which of the following options represents ash:\n(A) D\n(B) C\n(C) B\n(D) A" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.265625, "loss": 3.7286486625671387 }, { "id": "identity_335247", "image": "A-OKVQA/images/350552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the animal in the town center?\nChoices:\n(A) its shopping\n(B) its exercising\n(C) its working\n(D) its eating" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.734375, "loss": 4.23073673248291 }, { "id": "identity_247150", "image": "GeoQA+/images/3391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,已知∠ACD=20°,求∠BCD的度数。" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.8125, "loss": 1.9082093238830566 }, { "id": "identity_86351", "image": "UniGeo/images/777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if angle C is a right angle, AC=4, and BC=3, what is the length of AB?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.25, "loss": 2.0995371341705322 }, { "id": "identity_192961", "image": "PMC-VQA/images/PMC6861597_fig0002_70162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this image show the cerebellum?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.859375, "loss": 3.179988145828247 }, { "id": "identity_192842", "image": "PMC-VQA/images/PMC8130232_acm213246-fig-0001_444310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date and time when this medical image was captured?" }, { "from": "gpt", "value": "The answer is May 28, 2010, at 19:45 AM" } ], "clip_score": 26.65625, "loss": 1.7567427158355713 }, { "id": "identity_139624", "image": "FigureQA/images/39638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Aqua have a larger portion than Medium Aqua?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.6875, "loss": 3.231039047241211 }, { "id": "identity_102414", "image": "IconQA/images/17874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of bears to trains?" }, { "from": "gpt", "value": "The answer is 5:4" } ], "clip_score": 30.359375, "loss": 1.7825210094451904 }, { "id": "identity_286563", "image": "A-OKVQA/images/248250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is the likely source of the unpleasant smell in the market area? \n(A) limburger cheese\n(B) cow\n(C) manure\n(D) dead fish" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.5390625, "loss": 4.4985151290893555 }, { "id": "identity_100297", "image": "IconQA/images/35081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the marbles are black?" }, { "from": "gpt", "value": "The answer is 2/3" } ], "clip_score": 29.4375, "loss": 1.421431303024292 }, { "id": "identity_320684", "image": "VQA2.0/images/490112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are on the scooter?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 16.1875, "loss": 2.4091570377349854 }, { "id": "identity_119742", "image": "TextVQA/images/9f3f51db8c3de90d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light facing the camera?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.34375, "loss": 3.1686434745788574 }, { "id": "identity_91021", "image": "Geometry3K/images/1377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the base of the larger triangle is 20 units, what is the base of the smaller triangle?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.921875, "loss": 3.081463098526001 }, { "id": "identity_19210", "image": "GeoQA+/images/2092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC中,AE交BC于点D,∠C=∠CBE,AD:DE=3:5,BD=4,则DC的长等于()\nChoices:\n(A) \\frac{15}{4}\n(B) \\frac{12}{5}\n(C) \\frac{20}{3}\n(D) \\frac{17}{4}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.84375, "loss": 2.624910831451416 }, { "id": "identity_25971", "image": "TextVQA/images/f98670f6cba969ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the jersey number of the baseball player facing the crowd?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 29.75, "loss": 1.6295675039291382 }, { "id": "identity_300488", "image": "ScienceQA/images/2915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system accurate or inaccurate?\nHalf of the planets are primarily composed of gas.\nChoices:\n(A) accurate\n(B) inaccurate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.5, "loss": 4.134037017822266 }, { "id": "identity_219177", "image": "PMC-VQA/images/PMC9369558_jcm-11-04556-f001_369917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the dark area in the center of the right image?" }, { "from": "gpt", "value": "The answer is The dark area is the fourth ventricle" } ], "clip_score": 23.28125, "loss": 2.839496374130249 }, { "id": "identity_209059", "image": "A-OKVQA/images/472828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any beverages visible on the vendor's cart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.1875, "loss": 3.189411163330078 }, { "id": "identity_279890", "image": "AI2D/images/4777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the plant structure that consists of verticils arranged in a spike:\n(A) thyrse\n(B) catkin\n(C) raceme\n(D) verticilaster" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.90625, "loss": 3.6717758178710938 }, { "id": "identity_130137", "image": "A-OKVQA/images/268306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.015625, "loss": 2.5869691371917725 }, { "id": "identity_60091", "image": "DocVQA/images/rncj0037_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the \"K. C. CIGARETTES\" category contain a product with a cigarette code \"11/5SE\" under \"RIP-6 w/Filter\"?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.953125, "loss": 3.026784896850586 }, { "id": "identity_73302", "image": "ScienceQA/images/5715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the body of water located to the south of the highlighted country?" }, { "from": "gpt", "value": "The answer is Caribbean Sea" } ], "clip_score": 31.90625, "loss": 1.9900214672088623 }, { "id": "identity_40444", "image": "DVQA/images/bar_train_00140328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the highest sales for 'cell'?" }, { "from": "gpt", "value": "The answer is Stage" } ], "clip_score": 26.703125, "loss": 3.212754011154175 }, { "id": "identity_168417", "image": "TabMWP/images/14784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more smoothies were sold on Saturday compared to Sunday?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 37.53125, "loss": 2.5132455825805664 }, { "id": "identity_284574", "image": "TextVQA/images/5f0c8fe763efeb62.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What breed of dog is it?" }, { "from": "gpt", "value": "The answer is arson" } ], "clip_score": 23.03125, "loss": 3.1477625370025635 }, { "id": "identity_188232", "image": "PMC-VQA/images/PMC4285068_F2_348751.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the sulci and gyri of the cerebral cortex well-defined in this image?" }, { "from": "gpt", "value": "The answer is Yes, the sulci and gyri are well-defined" } ], "clip_score": 34.96875, "loss": 1.555277943611145 }, { "id": "identity_5598", "image": "GeoQA+/images/2813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,∠C+∠O=90°,AB=3,则劣弧AB的长度是()\nChoices:\n(A) π\n(B) 3π\n(C) 5π\n(D) 15π" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.03125, "loss": 2.6457877159118652 }, { "id": "identity_304235", "image": "Geometry3K/images/1178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that \\(\\triangle R S T \\cong \\triangle V U T\\), what is the area of \\(\\triangle R S T\\)?\nChoices:\n(A) 150\n(B) 300\n(C) 450\n(D) 600" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.28125, "loss": 2.487663984298706 }, { "id": "identity_55867", "image": "DVQA/images/bar_train_00091417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy of the 'taste' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is 58 (by estimating the values from the graph)" } ], "clip_score": 30.3125, "loss": 2.672834634780884 }, { "id": "identity_302707", "image": "DVQA/images/bar_train_00095258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are liked by more than 5 individuals in at least one category?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 28.234375, "loss": 3.489401340484619 }, { "id": "identity_281173", "image": "Super-CLEVR/images/superCLEVR_new_001937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the colors of the small rubber minivan and the small rubber bus the same?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.78125, "loss": 5.4649553298950195 }, { "id": "identity_64847", "image": "AI2D/images/272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for this food web?" }, { "from": "gpt", "value": "The answer is Sun" } ], "clip_score": 34.15625, "loss": 3.4419491291046143 }, { "id": "identity_102162", "image": "TabMWP/images/17269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of vehicles sold by the four dealerships?" }, { "from": "gpt", "value": "The answer is 668" } ], "clip_score": 33.5625, "loss": 1.8358548879623413 }, { "id": "identity_128832", "image": "VQA-RAD/images/synpic59536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of brain scan is shown in the image?" }, { "from": "gpt", "value": "The answer is MRI brain scan" } ], "clip_score": 31.84375, "loss": 2.74933123588562 }, { "id": "identity_43688", "image": "DVQA/images/bar_train_00141940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold in the 'ocean' store across all items?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 26.859375, "loss": 2.3368523120880127 }, { "id": "identity_177332", "image": "TabMWP/images/27256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of tractors made in the first quarter of the year (January to March)?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 36.84375, "loss": 2.2673001289367676 }, { "id": "identity_68180", "image": "AI2D/images/274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 34.6875, "loss": 4.197833061218262 }, { "id": "identity_50045", "image": "DVQA/images/bar_train_00080022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'drink' and 'jet' algorithms on the 'code' dataset?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 37.3125, "loss": 3.0648365020751953 }, { "id": "identity_204921", "image": "TextVQA/images/caeef917566e0387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the audience in the stand opposite the scoreboard?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 24.25, "loss": 3.288714647293091 }, { "id": "identity_82666", "image": "UniGeo/images/2507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the chord AC subtends a central angle of 80°, what is the degree measure of the angle formed by the tangent at point A and the chord AC?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 33.21875, "loss": 1.6770405769348145 }, { "id": "identity_156333", "image": "TQA/images/question_images/parts_microscope_7191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the part labeled as \"MECHANICAL STAGE\" used for?" }, { "from": "gpt", "value": "The answer is It holds and moves the slide for viewing" } ], "clip_score": 27.40625, "loss": 2.9502079486846924 }, { "id": "identity_283905", "image": "VizWiz/images/VizWiz_train_00007526.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you confirm if this package contains pineapple chunks or cantaloupe chunks?" }, { "from": "gpt", "value": "The answer is pineapple" } ], "clip_score": 34.25, "loss": 2.655147075653076 }, { "id": "identity_132612", "image": "PMC-VQA/images/PMC4887635_fig2_36072.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled as \"CF\"?" }, { "from": "gpt", "value": "The answer is Connective fascia or similar structure" } ], "clip_score": 26.265625, "loss": 4.07098388671875 }, { "id": "identity_55246", "image": "DocVQA/images/pmnx0078_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the offer mentioned in the 'Buy 1 pack' coupon?" }, { "from": "gpt", "value": "The answer is Buy 1 pack, get 1 free" } ], "clip_score": 23.34375, "loss": 1.65810227394104 }, { "id": "identity_118240", "image": "A-OKVQA/images/58225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Afternoon" } ], "clip_score": 7.36328125, "loss": 3.1211822032928467 }, { "id": "identity_35670", "image": "PMC-VQA/images/PMC5496392_Fig4_186289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the V sign indicate on lateral radiographs? \nChoices:\n(A) position of the spine \n(B) location of a fracture \n(C) alignment of the hips \n(D) angle of the knee " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.125, "loss": 4.5652174949646 }, { "id": "identity_6094", "image": "A-OKVQA/images/581458.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type train is shown here?\nChoices:\n(A) oil\n(B) coal\n(C) solar\n(D) elevated" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.9375, "loss": 4.0921735763549805 }, { "id": "identity_213890", "image": "PMC-VQA/images/PMC2892667_fig2_67547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we observe any cardiac enlargement in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.78125, "loss": 2.7163772583007812 }, { "id": "identity_125752", "image": "A-OKVQA/images/42819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final destination of the train according to the information board?" }, { "from": "gpt", "value": "The answer is Kastrup København" } ], "clip_score": 27.671875, "loss": 2.7261273860931396 }, { "id": "identity_274475", "image": "VQA2.0/images/478005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it possible for me to purchase food at this location?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.546875, "loss": 2.9557647705078125 }, { "id": "identity_150940", "image": "ScienceQA/images/16329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the peach in the image appear to have a smooth or rough texture?" }, { "from": "gpt", "value": "The answer is Smooth" } ], "clip_score": 34.71875, "loss": 2.68796443939209 }, { "id": "identity_125473", "image": "TextVQA/images/ff0ef7dc6d86e204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What network is broadcasting the event as per the board?" }, { "from": "gpt", "value": "The answer is ESPN" } ], "clip_score": 17.3125, "loss": 2.3772456645965576 }, { "id": "identity_208164", "image": "PMC-VQA/images/PMC7328922_F2_202997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is shown in this image?" }, { "from": "gpt", "value": "The answer is Pelvic region" } ], "clip_score": 28.796875, "loss": 2.9271435737609863 }, { "id": "identity_33169", "image": "Geometry3K/images/1160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 5\n(B) 10\n(C) 20\n(D) 100" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.53125, "loss": 3.235933780670166 }, { "id": "identity_262572", "image": "PMC-VQA/images/PMC9372450_f1_371374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the study periods, which imaging assessments were conducted?\nChoices:\n(A) PET scan and ultrasound\n(B) MRI and CT scan\n(C) X-ray and mammogram\n(D) EEG and ECG" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.578125, "loss": 3.6767959594726562 }, { "id": "identity_273178", "image": "Super-CLEVR/images/superCLEVR_new_000216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any small shiny objects to the right of the small shiny bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 23.390625, "loss": 5.481354713439941 }, { "id": "identity_157107", "image": "ScienceQA/images/5944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the large tree play in this ecosystem?" }, { "from": "gpt", "value": "The answer is Keystone species" } ], "clip_score": 21.9375, "loss": 3.528531789779663 }, { "id": "identity_264925", "image": "PlotQA/images/148270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there grids present in the graph?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.25, "loss": 3.2248401641845703 }, { "id": "identity_55952", "image": "DocVQA/images/hnfw0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount claimed in the loss and damage claim?" }, { "from": "gpt", "value": "The answer is 2031.48" } ], "clip_score": 19.9375, "loss": 2.170164108276367 }, { "id": "identity_285908", "image": "GeoQA+/images/3524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, if l1 is parallel to l2, angle A is 45°, and angle 2 is 100°, what is the degree measure of angle 1?\nChoices:\n(A) 50°\n(B) 55°\n(C) 45°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.15625, "loss": 2.9519741535186768 }, { "id": "identity_121771", "image": "A-OKVQA/images/84341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What trick is the skateboarder performing?" }, { "from": "gpt", "value": "The answer is Nose grab" } ], "clip_score": 27.453125, "loss": 4.0206732749938965 }, { "id": "identity_283469", "image": "PMC-VQA/images/PMC7010856_F1_114139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the images located in the middle row?\nChoices:\n(A) The extent of hematoma and edema \n(B) The vasculature of the brain \n(C) The ICH lesion in the striatum \n(D) The corresponding fractal analysis maps" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.90625, "loss": 3.7486839294433594 }, { "id": "identity_299970", "image": "TextVQA/images/72061ebefb7a25db.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the sports team?" }, { "from": "gpt", "value": "The answer is abro" } ], "clip_score": 15.3359375, "loss": 5.217254161834717 }, { "id": "identity_936", "image": "TabMWP/images/31289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Harry went to the store. He bought 2/5 of a kilogram of American cheese. How much did he spend?" }, { "from": "gpt", "value": "The answer is 1.20" } ], "clip_score": 29.953125, "loss": 2.4045703411102295 }, { "id": "identity_200163", "image": "A-OKVQA/images/315196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.796875, "loss": 3.100783586502075 }, { "id": "identity_104301", "image": "VQA2.0/images/393503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is inside the truck?" }, { "from": "gpt", "value": "The answer is Cow" } ], "clip_score": 28.5625, "loss": 2.841409921646118 }, { "id": "identity_11849", "image": "DocVQA/images/jpch0224_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the 12 digit Marketing Card Number?" }, { "from": "gpt", "value": "The answer is 21-203-000-3669" } ], "clip_score": 17.5, "loss": 2.3724863529205322 }, { "id": "identity_244015", "image": "TabMWP/images/8289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the train departs from the shopping district at 11:30 A.M., how many minutes does it take to reach the art gallery district?" }, { "from": "gpt", "value": "The answer is 120 minutes" } ], "clip_score": 35.5, "loss": 1.9778790473937988 }, { "id": "identity_333667", "image": "PMC-VQA/images/PMC6274901_ijms-19-03675-f001_406375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the angiograms of the common carotid arteries on both sides?\nChoices:\n(A) Calcification\n(B) Enlargement of lenticulostriate arteries\n(C) Hemorrhage\n(D) Normal arterial flow" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.34375, "loss": 4.838379383087158 }, { "id": "identity_21113", "image": "A-OKVQA/images/79544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the woman with the baby involved in?\nChoices:\n(A) burping\n(B) banking\n(C) travelling\n(D) marketing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5, "loss": 4.517343521118164 }, { "id": "identity_37694", "image": "ChartQA/images/2432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of blue bar and green bar in Germany?" }, { "from": "gpt", "value": "The answer is 0.042361111" } ], "clip_score": 18.03125, "loss": 2.3712189197540283 }, { "id": "identity_271846", "image": "FigureQA/images/20955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.21875, "loss": 3.992640972137451 }, { "id": "identity_176124", "image": "IconQA/images/92990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 30 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 5:30" } ], "clip_score": 30.65625, "loss": 1.5215497016906738 }, { "id": "identity_248581", "image": "Geometry3K/images/409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If \\( P R \\parallel W X \\), \\( W X = 10 \\), \\( X Y = 6 \\), \\( W Y = 8 \\), \\( R Y = 5 \\), and \\( P S = 3 \\), find the area of quadrilateral \\( P R Y S \\)" }, { "from": "gpt", "value": "The answer is 11.55" } ], "clip_score": 25.21875, "loss": 2.3877580165863037 }, { "id": "identity_328304", "image": "A-OKVQA/images/67115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do these animals have in the center of their heads?\nChoices:\n(A) sting\n(B) single horn\n(C) ossicones\n(D) cornet" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.515625, "loss": 4.258548736572266 }, { "id": "identity_17558", "image": "FigureQA/images/81437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2890625, "loss": 4.223367214202881 }, { "id": "identity_121806", "image": "PMC-VQA/images/PMC9314236_FIG1_350301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any midline shift observed in the image?" }, { "from": "gpt", "value": "The answer is No, there is no midline shift observed" } ], "clip_score": 19.78125, "loss": 1.7430999279022217 }, { "id": "identity_51590", "image": "DVQA/images/bar_train_00122254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the highest bar in the \"left\" category?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 21.109375, "loss": 2.707321882247925 }, { "id": "identity_193798", "image": "PMC-VQA/images/PMC7034755_FIG1_122260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the patient in this image?" }, { "from": "gpt", "value": "The answer is Supine (lying on the back)" } ], "clip_score": 25.0625, "loss": 2.347052812576294 }, { "id": "identity_280864", "image": "VizWiz/images/VizWiz_train_00005592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What breed of dog is this?" }, { "from": "gpt", "value": "The answer is german shepherd" } ], "clip_score": 22.75, "loss": 2.497415542602539 }, { "id": "identity_17333", "image": "VQA2.0/images/207978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horses are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.296875, "loss": 2.1853389739990234 }, { "id": "identity_41928", "image": "MapQA/images/map_7917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with trips less than 37778?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.953125, "loss": 2.781344175338745 }, { "id": "identity_329418", "image": "A-OKVQA/images/571746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people next to the train doing?\nChoices:\n(A) sledding\n(B) running\n(C) skiing\n(D) rolling" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.4375, "loss": 4.456827640533447 }, { "id": "identity_323099", "image": "ScienceQA/images/20726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Carson City\n(B) Trenton\n(C) Las Vegas\n(D) Salem" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.578125, "loss": 4.356021404266357 }, { "id": "identity_167863", "image": "IconQA/images/35180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a square was drawn inside the rectangle with each side equal to the width of the rectangle, what would be the area of the square?" }, { "from": "gpt", "value": "The answer is 16 cm²" } ], "clip_score": 32.4375, "loss": 1.7785935401916504 }, { "id": "identity_50307", "image": "PlotQA/images/115378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in male unemployment rate between the United States in 1982 and Canada in 1980?" }, { "from": "gpt", "value": "The answer is 5%" } ], "clip_score": 25.046875, "loss": 2.251281261444092 }, { "id": "identity_226721", "image": "GeoQA+/images/2882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,CD是⊙O的弦,∠ABD=60°,若∠CDB=2∠C,则∠C的度数是多少?" }, { "from": "gpt", "value": "The answer is 45°" } ], "clip_score": 32.15625, "loss": 1.7433123588562012 }, { "id": "identity_118829", "image": "A-OKVQA/images/88470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the perch the birds are sitting on?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.203125, "loss": 3.051284074783325 }, { "id": "identity_328343", "image": "PlotQA/images/19004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the gross enrolment ratio (female) in secondary schools always lower than the gross enrolment ratio (female) in primary schools over the years?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.0, "loss": 3.148552179336548 }, { "id": "identity_330549", "image": "ScienceQA/images/17463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can they trade to each get what they want?\nChoices:\n(A) They can trade tomatoes for carrots.\n(B) They can trade almonds for tomatoes.\n(C) They can trade tomatoes for broccoli.\n(D) They can trade broccoli for oranges." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 3.2142367362976074 }, { "id": "identity_83597", "image": "UniGeo/images/2553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the distance from village C to factory D is extended by 2 km, what would be the new distance from C to D?" }, { "from": "gpt", "value": "The answer is 7 km" } ], "clip_score": 26.90625, "loss": 2.135692834854126 }, { "id": "identity_273080", "image": "AI2D/images/2602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram corresponds to the High tide?\nChoices:\n(A) E\n(B) F\n(C) A\n(D) G" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.3125, "loss": 4.489573955535889 }, { "id": "identity_200703", "image": "A-OKVQA/images/348027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the uniform of the catcher?" }, { "from": "gpt", "value": "The answer is Black and Orange" } ], "clip_score": 18.21875, "loss": 3.5803565979003906 }, { "id": "identity_207698", "image": "A-OKVQA/images/476852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is visible on the desk?" }, { "from": "gpt", "value": "The answer is A computer" } ], "clip_score": 25.125, "loss": 3.46030330657959 }, { "id": "identity_92878", "image": "IconQA/images/94730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 30 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 5:30" } ], "clip_score": 30.734375, "loss": 1.5022226572036743 }, { "id": "identity_315484", "image": "A-OKVQA/images/239001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the poled items used for?\nChoices:\n(A) block sun\n(B) block rain\n(C) block wind\n(D) block snow" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1171875, "loss": 4.775261878967285 }, { "id": "identity_67787", "image": "TQA/images/question_images/human_system_circulatory_3633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the aorta in the cardiovascular system?" }, { "from": "gpt", "value": "The answer is Carries oxygen-rich blood to the body" } ], "clip_score": 32.625, "loss": 2.2820990085601807 }, { "id": "identity_52613", "image": "DocVQA/images/lxjw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the institute requesting applications?" }, { "from": "gpt", "value": "The answer is NATIONAL HEART, LUNG, AND BLOOD INSTITUTE" } ], "clip_score": 19.625, "loss": 1.0475674867630005 }, { "id": "identity_97791", "image": "IconQA/images/77603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this shape have any parallel sides?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.65625, "loss": 3.2806460857391357 }, { "id": "identity_16423", "image": "PMC-VQA/images/PMC7515806_FIG2_260946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the expansile cord lesion located? \nChoices:\n(A) In the cervical spinal cord \n(B) In the lumbar spinal cord \n(C) In the thoracic spinal cord \n(D) In the sacral spinal cord " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.09375, "loss": 3.7456772327423096 }, { "id": "identity_319076", "image": "FigureQA/images/95499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Gray the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0625, "loss": 3.8869502544403076 }, { "id": "identity_91963", "image": "TabMWP/images/9328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more scoops of juice mix does Kenny need to use for 8 liters of water compared to 5 liters?" }, { "from": "gpt", "value": "The answer is 6 more scoops" } ], "clip_score": 31.53125, "loss": 1.5879578590393066 }, { "id": "identity_38970", "image": "DVQA/images/bar_train_00174916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest 'animal' value?" }, { "from": "gpt", "value": "The answer is vice" } ], "clip_score": 26.96875, "loss": 4.310926914215088 }, { "id": "identity_52870", "image": "DVQA/images/bar_train_00051589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the least sales for the item 'liquor'?" }, { "from": "gpt", "value": "The answer is fist" } ], "clip_score": 28.421875, "loss": 3.2035024166107178 }, { "id": "identity_334650", "image": "TabMWP/images/21568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some people compared how many clubs they belong to. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.09375, "loss": 3.3522708415985107 }, { "id": "identity_226502", "image": "Geometry3K/images/1758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of the arc \\( \\overset{\\frown}{DG} \\) if \\( m \\angle DGI = 24^\\circ \\) and \\( r = 6 \\)" }, { "from": "gpt", "value": "The answer is \\( \\frac{4 \\pi}{5} \\)" } ], "clip_score": 29.578125, "loss": 1.9746767282485962 }, { "id": "identity_198076", "image": "PMC-VQA/images/PMC8099429_fig5s3_434013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the cells in the image healthy or showing signs of pathology?" }, { "from": "gpt", "value": "The answer is The cells appear healthy" } ], "clip_score": 25.734375, "loss": 3.6813037395477295 }, { "id": "identity_202940", "image": "A-OKVQA/images/522252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any duck standing on one leg?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.859375, "loss": 2.963968276977539 }, { "id": "identity_294513", "image": "MapQA/images/map_12085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the valuation of New York?" }, { "from": "gpt", "value": "The answer is 469324-471144" } ], "clip_score": 21.203125, "loss": 2.442418098449707 }, { "id": "identity_326873", "image": "DocVQA/images/ymfv0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is happening at 3:00?" }, { "from": "gpt", "value": "The answer is Coffee" } ], "clip_score": 17.609375, "loss": 2.5347964763641357 }, { "id": "identity_304930", "image": "TQA/images/abc_question_images/solar_system_16291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label corresponds to Mars?\nChoices:\n(A) E\n(B) A\n(C) H\n(D) F" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.765625, "loss": 3.661447048187256 }, { "id": "identity_90697", "image": "Geometry3K/images/1948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle G in the parallelogram FGHD?" }, { "from": "gpt", "value": "The answer is 27.6 degrees" } ], "clip_score": 36.3125, "loss": 2.436145544052124 }, { "id": "identity_308064", "image": "TabMWP/images/6418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A reporter reviewed her notes to recall the number of interviews conducted over the past 6 days. What is the mode of these numbers?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 30.890625, "loss": 2.06740140914917 }, { "id": "identity_233133", "image": "TabMWP/images/7326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for transporting 3 bicycles and 2 cars using the Southport-Fort Fisher ferry and the Mukilteu-Clinton ferry?" }, { "from": "gpt", "value": "The answer is $45" } ], "clip_score": 36.6875, "loss": 2.150205135345459 }, { "id": "identity_52937", "image": "DVQA/images/bar_train_00115188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the preference for the object 'cycle' in the 'sun' category less than the 'source' category?" }, { "from": "gpt", "value": "The answer is Yes, 1 (sun) < 4 (source)" } ], "clip_score": 25.265625, "loss": 2.3407018184661865 }, { "id": "identity_271094", "image": "TabMWP/images/3541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Tiana have sufficient funds to purchase both a pair of shorts and a purple T-shirt with her $10.00?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.15625, "loss": 3.9548518657684326 }, { "id": "identity_267146", "image": "IconQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the quantity of rectangles an even or odd number?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.4375, "loss": 4.053003787994385 }, { "id": "identity_308565", "image": "ScienceQA/images/16717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Tallahassee\n(B) Richmond\n(C) Orlando\n(D) Columbia" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.96875, "loss": 4.203289985656738 }, { "id": "identity_223478", "image": "TextVQA/images/f12683eb77dbc10f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of equipment is visible in the background to the right?" }, { "from": "gpt", "value": "The answer is Baseball bats" } ], "clip_score": 25.203125, "loss": 2.7573513984680176 }, { "id": "identity_196219", "image": "PMC-VQA/images/PMC6293265_ccr31853-fig-0002_411497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical term for the black areas representing air in the lungs?" }, { "from": "gpt", "value": "The answer is Pulmonary parenchyma" } ], "clip_score": 31.28125, "loss": 2.2790071964263916 }, { "id": "identity_265995", "image": "ScienceQA/images/20103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each pair. Which statement accurately describes the magnitude of the magnetic force between the pairs of magnets? \nOptions:\n(A) The magnetic force is greater in Pair 2.\n(B) The magnetic force is equal in both pairs.\n(C) The magnetic force is greater in Pair 1." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.328125, "loss": 3.8063323497772217 }, { "id": "identity_4761", "image": "FigureQA/images/37605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orchid the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.59375, "loss": 3.949347734451294 }, { "id": "identity_282714", "image": "PMC-VQA/images/PMC6790021_Fig2_50301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which column in the graph represents distance?\nChoices:\n(A) Left column\n(B) Right column\n(C) Top column\n(D) Bottom column" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.79296875, "loss": 4.062571048736572 }, { "id": "identity_62150", "image": "DocVQA/images/mhwm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the New York University Medical Center?" }, { "from": "gpt", "value": "The answer is 550 First Avenue, New York, NY 10016" } ], "clip_score": 19.109375, "loss": 0.8134667277336121 }, { "id": "identity_315676", "image": "Super-CLEVR/images/superCLEVR_new_001132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the blue thing to the right of the big thing behind the big rubber car?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 22.171875, "loss": 4.410182952880859 }, { "id": "identity_10352", "image": "DocVQA/images/ytyc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the salutation of this letter?" }, { "from": "gpt", "value": "The answer is Dear Dr. Shank" } ], "clip_score": 23.46875, "loss": 2.273062229156494 }, { "id": "identity_198976", "image": "PMC-VQA/images/PMC5608210_pone.0183979.g003_218259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any shapes that are not completely enclosed?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.265625, "loss": 3.4467475414276123 }, { "id": "identity_294582", "image": "PMC-VQA/images/PMC6630004_cns13118-fig-0001_8507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which MRI sequence was utilized to capture the images among the following options: \n(A) T2-weighted coronal sequence \n(B) T1-weighted sagittal sequence \n(C) T1-weighted coronal sequence \n(D) T1-weighted sagittal sequence" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.03125, "loss": 3.3001809120178223 }, { "id": "identity_228198", "image": "ScienceQA/images/8714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific aspects of the drone's flight performance could Valentina's test evaluate when adding the blade guards, and how might these aspects be measured?" }, { "from": "gpt", "value": "The answer is Stability, maneuverability, flight duration, power consumption, and speed" } ], "clip_score": 25.671875, "loss": 2.981463670730591 }, { "id": "identity_85271", "image": "UniGeo/images/1377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠BOD is 100°, what is the measure of ∠BCD?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 33.6875, "loss": 1.6057662963867188 }, { "id": "identity_81821", "image": "UniGeo/images/1304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB is the diameter of circle O and chord CD is perpendicular to AB at point C, and the length of CD is 6 units, what is the length of radius OC?" }, { "from": "gpt", "value": "The answer is 3 units" } ], "clip_score": 36.40625, "loss": 1.8192437887191772 }, { "id": "identity_42870", "image": "DVQA/images/bar_train_00029864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in total prefer 'favor' across all categories?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 26.53125, "loss": 2.039642572402954 }, { "id": "identity_96807", "image": "IconQA/images/51712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the crabs, how many objects will remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.21875, "loss": 2.2946066856384277 }, { "id": "identity_83932", "image": "UniGeo/images/1561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if DE is parallel to BC, AD = 2, AE = 3, and BD = 4, what is the ratio of the area of triangle ADE to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 4/9" } ], "clip_score": 35.625, "loss": 1.7696856260299683 }, { "id": "identity_182096", "image": "TextVQA/images/5ceb6c2732ef764f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players in total are visible on the field?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 20.59375, "loss": 2.2303287982940674 }, { "id": "identity_92484", "image": "TabMWP/images/6833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Kirk buys an equal amount of granola and muesli totaling 10 pounds, how many pounds of each does he buy and what is the total cost?" }, { "from": "gpt", "value": "The answer is 5 pounds of each, $55 total" } ], "clip_score": 29.859375, "loss": 2.779540777206421 }, { "id": "identity_170918", "image": "IconQA/images/58101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green ovals are there in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.890625, "loss": 2.6535980701446533 }, { "id": "identity_3827", "image": "GEOS/images/051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Two equal circles are cut out of a rectangle of card of dimensions 16 by 8. The circles have the maximum diameter possible. What is the approximate area of the paper remaining after the circles have been cut out?\nChoices:\n(A) 104\n(B) 78\n(C) 54\n(D) 27\n(E) 13" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.953125, "loss": 2.2215614318847656 }, { "id": "identity_282698", "image": "A-OKVQA/images/306972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following individuals is in close proximity to the train? \n(A) cowboy \n(B) police officer \n(C) conductor \n(D) firefighter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.296875, "loss": 4.552494525909424 }, { "id": "identity_181230", "image": "IconQA/images/50526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two crabs are removed from the image, how many objects will remain?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 19.203125, "loss": 2.2468175888061523 }, { "id": "identity_291366", "image": "DVQA/images/bar_train_00114551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bar groups?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 15.5078125, "loss": 3.7365570068359375 }, { "id": "identity_18593", "image": "DVQA/images/bar_train_00090018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What store does the mediumpurple color represent?" }, { "from": "gpt", "value": "The answer is mirror" } ], "clip_score": 23.015625, "loss": 3.9366583824157715 }, { "id": "identity_244846", "image": "A-OKVQA/images/246183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which electronics company made the blue balloons, and how many people are holding them in the image?" }, { "from": "gpt", "value": "The answer is Samsung, 4 people" } ], "clip_score": 20.84375, "loss": 2.9711413383483887 }, { "id": "identity_76070", "image": "AI2D/images/3101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"KERN\" refer to in English?" }, { "from": "gpt", "value": "The answer is Nucleus" } ], "clip_score": 20.484375, "loss": 1.7677578926086426 }, { "id": "identity_139629", "image": "DVQA/images/bar_train_00123519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the bar with the highest number of triangles in the 'star' group?" }, { "from": "gpt", "value": "The answer is king" } ], "clip_score": 17.71875, "loss": 4.514671325683594 }, { "id": "identity_27058", "image": "DVQA/images/bar_train_00104118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is prison" } ], "clip_score": 28.171875, "loss": 3.7507920265197754 }, { "id": "identity_51509", "image": "DVQA/images/bar_train_00162206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the 'dancer' category across all groups?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 28.6875, "loss": 2.3739335536956787 }, { "id": "identity_264300", "image": "TextVQA/images/b90b6a1c98514380.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify one of the Broadway show advertisements in this photo?" }, { "from": "gpt", "value": "The answer is mamma mia" } ], "clip_score": 18.6875, "loss": 2.7269256114959717 }, { "id": "identity_53745", "image": "FigureQA/images/69164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines are represented in the legend?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 16.421875, "loss": 2.155484437942505 }, { "id": "identity_23780", "image": "PMC-VQA/images/PMC4868808_FIG1_30997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the location of the thrombus seen in the ultrasound image? \nChoices:\n(A) Left Popliteal vein \n(B) Left Superficial Femoral Vein \n(C) Left External Iliac Vein \n(D) Left Common Iliac Vein " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.03125, "loss": 3.780454158782959 }, { "id": "identity_331880", "image": "VQA2.0/images/262550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this individual riding a motorcycle?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.578125, "loss": 3.3186821937561035 }, { "id": "identity_32687", "image": "TabMWP/images/492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Caleb need to buy a book signed by a famous author, a skate signed by a famous figure skater, and a jersey signed by a famous hockey player?" }, { "from": "gpt", "value": "The answer is 226" } ], "clip_score": 34.34375, "loss": 2.6934654712677 }, { "id": "identity_182244", "image": "PMC-VQA/images/PMC9031993_jcm-11-02084-f003_262856.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any roots in the image that are broken or damaged?" }, { "from": "gpt", "value": "The answer is Yes, there are broken roots" } ], "clip_score": 27.0, "loss": 2.909874439239502 }, { "id": "identity_296334", "image": "DocVQA/images/pjgn0226_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall amount of money to be paid?" }, { "from": "gpt", "value": "The answer is $24,650.00" } ], "clip_score": 21.796875, "loss": 1.7479311227798462 }, { "id": "identity_289519", "image": "Geometry3K/images/373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 50\n(B) 52\n(C) 54\n(D) 56" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.6875, "loss": 2.4737865924835205 }, { "id": "identity_325541", "image": "TextVQA/images/bda84bf5acace54b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the Jose Cuervo Margarita?" }, { "from": "gpt", "value": "The answer is 10.25" } ], "clip_score": 26.890625, "loss": 1.9541399478912354 }, { "id": "identity_266807", "image": "TabMWP/images/34791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Baldwin has $12 and decides to purchase a mystery novel and a dog calendar, how much money will he have remaining?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 28.046875, "loss": 2.7185802459716797 }, { "id": "identity_292875", "image": "AI2D/images/353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what is the term for the stage between egg and adult?\nChoices:\n(A) none of the above\n(B) nymph\n(C) teenager\n(D) larva" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.46875, "loss": 4.126107215881348 }, { "id": "identity_127814", "image": "Super-CLEVR/images/superCLEVR_new_001887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 22.03125, "loss": 2.6560251712799072 }, { "id": "identity_262453", "image": "IconQA/images/32991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using paper clips. The line is approximately (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.015625, "loss": 2.418477773666382 }, { "id": "identity_80751", "image": "Geometry3K/images/147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given a = 14, b = 48, and c = 50 in triangle ABC, find the length of the altitude from vertex B to side AC" }, { "from": "gpt", "value": "The answer is 19.49" } ], "clip_score": 33.0625, "loss": 2.0474636554718018 }, { "id": "identity_239676", "image": "CLEVR-Math/images/CLEVR_train_009314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are left after removing all cubes and spheres?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.890625, "loss": 2.390897750854492 }, { "id": "identity_308590", "image": "FigureQA/images/81817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Peru?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.0625, "loss": 4.2345662117004395 }, { "id": "identity_75485", "image": "ScienceQA/images/18988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the typical color range of phyllite?" }, { "from": "gpt", "value": "The answer is Gray to black, sometimes green or silver" } ], "clip_score": 28.78125, "loss": 3.770092010498047 }, { "id": "identity_151189", "image": "ScienceQA/images/10331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the state marked with an orange dot?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.265625, "loss": 2.0506134033203125 }, { "id": "identity_134346", "image": "ChartQA/images/multi_col_60878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of male to female respondents who prefer 'Video'?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 28.671875, "loss": 1.5915532112121582 }, { "id": "identity_20580", "image": "AI2D/images/3434.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the section at the back called?\nChoices:\n(A) head\n(B) thorax\n(C) abdomen\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.21875, "loss": 4.1809306144714355 }, { "id": "identity_217928", "image": "VQA-AS/images/8709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are depicted in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.515625, "loss": 4.235805988311768 }, { "id": "identity_32689", "image": "VQA-AS/images/3028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the children going to a picnic?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.328125, "loss": 3.271088123321533 }, { "id": "identity_322369", "image": "DocVQA/images/nqvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of this person?" }, { "from": "gpt", "value": "The answer is Executive Director" } ], "clip_score": 20.53125, "loss": 4.34142541885376 }, { "id": "identity_17778", "image": "FigureQA/images/64049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.0, "loss": 3.761943817138672 }, { "id": "identity_184976", "image": "PMC-VQA/images/PMC6723156_jcm-08-01197-f006_33060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement in the main image?" }, { "from": "gpt", "value": "The answer is 50 micrometers" } ], "clip_score": 10.203125, "loss": 1.6440321207046509 }, { "id": "identity_132642", "image": "VizWiz/images/VizWiz_train_00015166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color of the book cover?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 23.15625, "loss": 2.811091184616089 }, { "id": "identity_235757", "image": "ScienceQA/images/3464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the formation process of breccia and explain how it differs from the formation of a typical mineral" }, { "from": "gpt", "value": "The answer is Breccia forms from rock fragments; minerals form through crystallization" } ], "clip_score": 29.5625, "loss": 2.664320230484009 }, { "id": "identity_321758", "image": "PMC-VQA/images/PMC6441667_Fig11_459429.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the mandible was removed?\nChoices:\n(A) Right mandible \n(B) Left mandible \n(C) Upper mandible \n(D) Lower mandible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.088431358337402 }, { "id": "identity_121246", "image": "TextVQA/images/4d2aa5c07005965b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the format of the item in the image?" }, { "from": "gpt", "value": "The answer is Vinyl record album cover" } ], "clip_score": 22.734375, "loss": 3.5794460773468018 }, { "id": "identity_38396", "image": "GeoQA+/images/1264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线CD与线段AB为直径的圆相切于点D,并交BA的延长线于点C,且AB=6,AD=3,P点在切线CD上移动.当∠APB的度数最大时,则∠ABP的度数为()\nChoices:\n(A) 90°\n(B) 60°\n(C) 45°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.90625, "loss": 2.7114994525909424 }, { "id": "identity_272138", "image": "TextVQA/images/91cfbf2260ce77cd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of named shades of green?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 17.546875, "loss": 2.2234866619110107 }, { "id": "identity_328943", "image": "PMC-VQA/images/PMC4330234_Fig1_358796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was observed in the scan?\nChoices:\n(A) Intraabdominal abscesses \n(B) Postoperative ileus \n(C) Striking pericaecal inflammation \n(D) None of these" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.71875, "loss": 3.8563315868377686 }, { "id": "identity_88460", "image": "Geometry3K/images/696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side y in the triangle?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 36.3125, "loss": 2.4832816123962402 }, { "id": "identity_251112", "image": "TextVQA/images/d7ac482188ca280b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the three merchandise sections displayed from left to right starting from the plumbing section?" }, { "from": "gpt", "value": "The answer is Plumbing, Paint, Power Tools" } ], "clip_score": 23.390625, "loss": 1.9422847032546997 }, { "id": "identity_174004", "image": "IconQA/images/3493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is not blue?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 28.96875, "loss": 1.5864944458007812 }, { "id": "identity_99285", "image": "TabMWP/images/12851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of all the scores in the trivia game?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 24.40625, "loss": 2.0895514488220215 }, { "id": "identity_271811", "image": "FigureQA/images/16768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Indian Red not the minimum color option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.859375, "loss": 3.8735241889953613 }, { "id": "identity_210276", "image": "TextVQA/images/070a354ea162e868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What floral item is being held in the image?" }, { "from": "gpt", "value": "The answer is Carnations" } ], "clip_score": 12.7890625, "loss": 3.084608793258667 }, { "id": "identity_307175", "image": "ScienceQA/images/10502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these bodies of water does the prime meridian cross?\nChoices:\n(A) the Arctic Ocean\n(B) the Pacific Ocean\n(C) the Indian Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.171875, "loss": 4.320015907287598 }, { "id": "identity_271188", "image": "GeoQA+/images/5397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, O is the center of the circle, AB is the diameter, C is a point on the semicircle, and D is a point on AC. If ∠BOC = 40°, what is the measure of ∠D?\nChoices:\n(A) 110°\n(B) 120°\n(C) 130°\n(D) 140°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.15625, "loss": 2.183598518371582 }, { "id": "identity_159113", "image": "GeoQA+/images/3878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,若∠BCD=80°,则∠AOD的度数是()?" }, { "from": "gpt", "value": "The answer is 100°" } ], "clip_score": 32.28125, "loss": 1.6285250186920166 }, { "id": "identity_135303", "image": "PlotQA/images/37723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of subscribers for Italy in 2007?" }, { "from": "gpt", "value": "The answer is Approximately 8000000" } ], "clip_score": 30.796875, "loss": 2.074139356613159 }, { "id": "identity_282700", "image": "PMC-VQA/images/PMC6476123_fig2_469075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the initial CT scan at the time of admission? \nChoices:\n(A) Bilateral fracture of the medial mass of the sacrum.\n(B) Displaced bilateral fracture of the lateral mass of the sacrum.\n(C) Nondisplaced unilateral fracture of the medial mass of the sacrum.\n(D) Displaced unilateral fracture of the lateral mass of the sacrum." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.1875, "loss": 3.975355625152588 }, { "id": "identity_202130", "image": "PMC-VQA/images/PMC8035432_brb32051-fig-0001_413225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of midline shift in this brain image?" }, { "from": "gpt", "value": "The answer is No, there is no midline shift" } ], "clip_score": 29.90625, "loss": 2.0420315265655518 }, { "id": "identity_77972", "image": "TQA/images/question_images/human_system_reproductory_7035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Corpus cavernosum\"?" }, { "from": "gpt", "value": "The answer is It helps maintain an erection" } ], "clip_score": 26.65625, "loss": 2.5981602668762207 }, { "id": "identity_161206", "image": "UniGeo/images/418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and point A is the midpoint of arc DC, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 34.8125, "loss": 1.5476882457733154 }, { "id": "identity_274599", "image": "DVQA/images/bar_train_00174045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the leftmost bar in each group?" }, { "from": "gpt", "value": "The answer is soap" } ], "clip_score": 23.828125, "loss": 4.521054744720459 }, { "id": "identity_154280", "image": "AI2D/images/2075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the secondary consumer(s) in the ecosystem shown in the image" }, { "from": "gpt", "value": "The answer is The snake and larger animals" } ], "clip_score": 30.171875, "loss": 4.246946811676025 }, { "id": "identity_39441", "image": "FigureQA/images/81854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Steel Blue have a consistent trend throughout the graph?" }, { "from": "gpt", "value": "The answer is No, it fluctuates" } ], "clip_score": 27.265625, "loss": 2.799536943435669 }, { "id": "identity_274663", "image": "FigureQA/images/1925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.78125, "loss": 3.737499237060547 }, { "id": "identity_311865", "image": "DVQA/images/bar_train_00120112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the smallest individual bar in this chart?" }, { "from": "gpt", "value": "The answer is -6" } ], "clip_score": 28.53125, "loss": 2.954434633255005 }, { "id": "identity_301536", "image": "ChartQA/images/9922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of respondents believe immigrants in the U.S. are improving the economy?" }, { "from": "gpt", "value": "The answer is 28" } ], "clip_score": 27.46875, "loss": 2.6515514850616455 }, { "id": "identity_56607", "image": "DocVQA/images/lhjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who is the President?" }, { "from": "gpt", "value": "The answer is Robert R. Owen" } ], "clip_score": 25.203125, "loss": 1.6234188079833984 }, { "id": "identity_162541", "image": "GeoQA+/images/1369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the same triangle ABC, with points D and E on sides AB and AC respectively, and DE parallel to BC, if the length of side AB is 14, what is the length of side AD?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.59375, "loss": 2.4143869876861572 }, { "id": "identity_150479", "image": "ScienceQA/images/8278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly north of the highlighted state?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 32.4375, "loss": 3.121985673904419 }, { "id": "identity_86669", "image": "Geometry3K/images/1117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle P in the given quadrilateral MPRQ?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 33.25, "loss": 1.793254017829895 }, { "id": "identity_135282", "image": "DVQA/images/bar_train_00145144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of 'amount' units sold across all stores?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 25.546875, "loss": 1.8211300373077393 }, { "id": "identity_324974", "image": "VQA-AS/images/11181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the person's shirt?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 16.828125, "loss": 3.8850204944610596 }, { "id": "identity_225956", "image": "AI2D/images/469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of stages in the cycle starting from the Egg and ending at the Adult?" }, { "from": "gpt", "value": "The answer is Egg, Larva, Pupa, Adult" } ], "clip_score": 30.5625, "loss": 1.2255897521972656 }, { "id": "identity_209195", "image": "TextVQA/images/0259621e0cf41ab9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature is listed at the bottom left on the device in the foreground?" }, { "from": "gpt", "value": "The answer is 3D" } ], "clip_score": 13.859375, "loss": 2.9399356842041016 }, { "id": "identity_16929", "image": "TextVQA/images/cad24ea925944b15.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what letter is directly under 10?" }, { "from": "gpt", "value": "The answer is t" } ], "clip_score": 12.2421875, "loss": 3.9618277549743652 }, { "id": "identity_180499", "image": "TabMWP/images/8889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more teacher with 5 years of teaching experience is added to the list, what will be the new mean of the number of years?" }, { "from": "gpt", "value": "The answer is approximately 4.11" } ], "clip_score": 32.65625, "loss": 2.7284388542175293 }, { "id": "identity_114392", "image": "PMC-VQA/images/PMC7393149_Fig1_222964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the structure at the top of the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.625, "loss": 3.713319778442383 }, { "id": "identity_248675", "image": "PMC-VQA/images/PMC8586207_f1_110436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in a sagittal echocardiography view of the heart, and how do they help in diagnosing aortic arch abnormalities?" }, { "from": "gpt", "value": "The answer is Left ventricle, aortic valve, ascending aorta, aortic arch, descending aorta" } ], "clip_score": 26.3125, "loss": 1.3256803750991821 }, { "id": "identity_127472", "image": "VQA-AS/images/5291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of path is the vehicle riding on?" }, { "from": "gpt", "value": "The answer is A paved path" } ], "clip_score": 28.1875, "loss": 3.1497585773468018 }, { "id": "identity_95507", "image": "IconQA/images/1574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of marbles in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.078125, "loss": 1.9518018960952759 }, { "id": "identity_94681", "image": "IconQA/images/71791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 2 o'clock is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 29.5, "loss": 2.112837076187134 }, { "id": "identity_280000", "image": "Super-CLEVR/images/superCLEVR_new_001382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape does the matte object on the left side of the gray thing in front of the big brown dirtbike have?" }, { "from": "gpt", "value": "The answer is dirtbike" } ], "clip_score": 19.53125, "loss": 3.1420016288757324 }, { "id": "identity_696", "image": "FigureQA/images/87074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson greater than Periwinkle?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.8828125, "loss": 4.017238616943359 }, { "id": "identity_327160", "image": "IconQA/images/94026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are there?\nChoices:\n(A) 1\n(B) 3\n(C) 10\n(D) 4\n(E) 8" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 27.921875, "loss": 3.160165309906006 }, { "id": "identity_288942", "image": "A-OKVQA/images/304295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Into what shape are two of the grass wreaths fashioned? \n(A) octagon\n(B) rectangle\n(C) tree\n(D) cross" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.296875, "loss": 3.5579543113708496 }, { "id": "identity_245024", "image": "Super-CLEVR/images/superCLEVR_new_000919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more purple things than the sum of large red metal utility bikes and yellow things?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.796875, "loss": 4.896550178527832 }, { "id": "identity_166403", "image": "TabMWP/images/3853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price of cinnamon increases by 50%, how much would 1 kilogram cost?" }, { "from": "gpt", "value": "The answer is $4.50" } ], "clip_score": 29.8125, "loss": 1.6317757368087769 }, { "id": "identity_304723", "image": "FigureQA/images/20836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.65625, "loss": 4.229645729064941 }, { "id": "identity_271609", "image": "VQA-RAD/images/synpic38630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of lesion is indicated by the presence of internal gas and an enhancing rim?" }, { "from": "gpt", "value": "The answer is abcess" } ], "clip_score": 26.9375, "loss": 3.379352331161499 }, { "id": "identity_125947", "image": "A-OKVQA/images/264828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle on the left side of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 13.296875, "loss": 3.1024105548858643 }, { "id": "identity_304058", "image": "FigureQA/images/94317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this intersect Orange Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.953125, "loss": 3.672214984893799 }, { "id": "identity_65544", "image": "TQA/images/abc_question_images/human_system_circulatory_13637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"T\" indicate in the context of the circulatory system?" }, { "from": "gpt", "value": "The answer is Blood vessels of the legs" } ], "clip_score": 35.4375, "loss": 3.8298556804656982 }, { "id": "identity_190549", "image": "PMC-VQA/images/PMC6359439_ijms-20-00303-f001_431116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain scan?" }, { "from": "gpt", "value": "The answer is Coronal (frontal)" } ], "clip_score": 32.8125, "loss": 2.9184560775756836 }, { "id": "identity_192728", "image": "PMC-VQA/images/PMC6376781_Fig2_436488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Transverse or axial orientation, posterior labeled" } ], "clip_score": 28.65625, "loss": 4.889647006988525 }, { "id": "identity_315367", "image": "VizWiz/images/VizWiz_train_00016425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is visible here?" }, { "from": "gpt", "value": "The answer is icons" } ], "clip_score": 22.5625, "loss": 4.753824710845947 }, { "id": "identity_36888", "image": "VQA2.0/images/165169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars are parked?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 11.7890625, "loss": 2.276136636734009 }, { "id": "identity_274460", "image": "PMC-VQA/images/PMC8633688_fig4_127678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the scale bar? Is it (A) 1 meter, (B) 1 inch, (C) 1 centimeter, or (D) 1 millimeter?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.0625, "loss": 3.2643370628356934 }, { "id": "identity_115220", "image": "TextVQA/images/c76f0ac2816f7c69.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the border of the 'RIVER' sign?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.140625, "loss": 3.164788007736206 }, { "id": "identity_251133", "image": "PMC-VQA/images/PMC4330237_Fig15_358901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific anatomical structures can be identified in the PTG, and what is their significance in diagnosing dental conditions?" }, { "from": "gpt", "value": "The answer is Root sockets of the teeth, alveolar bone, teeth alignment, jawbone health" } ], "clip_score": 30.625, "loss": 3.5102977752685547 }, { "id": "identity_222902", "image": "TextVQA/images/1b7dfdac663874aa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is depicted on the leftmost bottle's label?" }, { "from": "gpt", "value": "The answer is Elephant" } ], "clip_score": 29.390625, "loss": 2.4819533824920654 }, { "id": "identity_316118", "image": "UniGeo/images/1743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆O中,若点C是弧AB的中点,∠A=50°,则∠BOC等于多少?\n选项:\n(A) 50°\n(B) 45°\n(C) 40°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.984375, "loss": 2.7651638984680176 }, { "id": "identity_241883", "image": "Geometry3K/images/474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine \\( m \\widehat{YCA} \\)" }, { "from": "gpt", "value": "The answer is 117" } ], "clip_score": 28.359375, "loss": 3.4695627689361572 }, { "id": "identity_34974", "image": "TabMWP/images/11455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jordan has $194. Does he have enough to buy a desk and a hammock?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.359375, "loss": 3.4581406116485596 }, { "id": "identity_154411", "image": "AI2D/images/4343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which citrus fruit in the image is a hybrid of a tangerine and a grapefruit?" }, { "from": "gpt", "value": "The answer is Tangelo" } ], "clip_score": 29.265625, "loss": 2.365657329559326 }, { "id": "identity_15815", "image": "PMC-VQA/images/PMC5084403_Fig10_88889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by red fluorescence? \nChoices:\n(A) Endoplasmic reticulum \n(B) Mitochondria \n(C) Nucleus \n(D) Golgi apparatus " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.703125, "loss": 4.210017204284668 }, { "id": "identity_314912", "image": "VQA2.0/images/544921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vehicle would you want to see if something was just stolen from you?" }, { "from": "gpt", "value": "The answer is car on bottom" } ], "clip_score": 14.609375, "loss": 4.3587541580200195 }, { "id": "identity_45687", "image": "FigureQA/images/30485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Orange segment larger than the Periwinkle segment?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.1875, "loss": 3.457007646560669 }, { "id": "identity_282449", "image": "ScienceQA/images/4305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What common property do these two objects share among the following choices: (A) breakable or (B) soft?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.75, "loss": 4.02555513381958 }, { "id": "identity_256436", "image": "DocVQA/images/jhjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the editor of the publication located in Morrill, Neb.?" }, { "from": "gpt", "value": "The answer is E. R. Montgomery" } ], "clip_score": 21.453125, "loss": 2.6804018020629883 }, { "id": "identity_328519", "image": "PMC-VQA/images/PMC7237708_f1_174304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image is associated with the treatment involving rabeprazole and calcium?\nChoices:\n(A) Image A \n(B) Image B \n(C) Image C \n(D) Image D" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.734375, "loss": 3.5713775157928467 }, { "id": "identity_319085", "image": "MapQA/images/map_258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in this region?" }, { "from": "gpt", "value": "The answer is 33129-33934" } ], "clip_score": 23.859375, "loss": 2.187635898590088 }, { "id": "identity_29028", "image": "A-OKVQA/images/484761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of activities are happening here?\nChoices:\n(A) winter\n(B) electronic\n(C) aquatic\n(D) aviation" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.328125, "loss": 4.026479721069336 }, { "id": "identity_317647", "image": "TabMWP/images/32011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy a silver bow and a shiny red button?" }, { "from": "gpt", "value": "The answer is 0.04" } ], "clip_score": 33.65625, "loss": 1.9547924995422363 }, { "id": "identity_241403", "image": "AI2D/images/3411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the membrane that develops around an oocyte in insects and describe its function in the development process" }, { "from": "gpt", "value": "The answer is The vitelline membrane provides protection and prevents polyspermy" } ], "clip_score": 34.40625, "loss": 3.062781810760498 }, { "id": "identity_202592", "image": "A-OKVQA/images/308892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the bike seat cover?" }, { "from": "gpt", "value": "The answer is Leopard print" } ], "clip_score": 26.359375, "loss": 2.710599660873413 }, { "id": "identity_245103", "image": "TextVQA/images/42b87a51a6913d93.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the status of the road construction and how does it affect the businesses in the area?" }, { "from": "gpt", "value": "The answer is businesses are open despite ongoing road construction" } ], "clip_score": 29.5, "loss": 2.9597134590148926 }, { "id": "identity_55607", "image": "DocVQA/images/hpbv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the institution associated with the sender?" }, { "from": "gpt", "value": "The answer is The Johns Hopkins University School of Medicine" } ], "clip_score": 20.25, "loss": 2.134547710418701 }, { "id": "identity_67790", "image": "TQA/images/question_images/human_system_circulatory_3633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the vessel labeled \"Superior vena cava\"?" }, { "from": "gpt", "value": "The answer is Carries deoxygenated blood to the heart" } ], "clip_score": 30.046875, "loss": 2.197054386138916 }, { "id": "identity_306300", "image": "TabMWP/images/6398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy 5 of these and 6 of those?" }, { "from": "gpt", "value": "The answer is 8,970" } ], "clip_score": 27.1875, "loss": 2.4081943035125732 }, { "id": "identity_134339", "image": "FigureQA/images/42452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the Sky Blue bar?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.5625, "loss": 2.6574809551239014 }, { "id": "identity_37190", "image": "VQA2.0/images/381163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you see any vehicles coming around the corner?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.4375, "loss": 3.3477988243103027 }, { "id": "identity_149830", "image": "DocVQA/images/kkhf0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date at the bottom of the document?" }, { "from": "gpt", "value": "The answer is 9/16/66" } ], "clip_score": 16.484375, "loss": 2.5517377853393555 }, { "id": "identity_214504", "image": "A-OKVQA/images/459185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the flag on the left side of the jump?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.703125, "loss": 3.265821933746338 }, { "id": "identity_212456", "image": "PMC-VQA/images/PMC4150633_f0005_316993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in anatomical terms?" }, { "from": "gpt", "value": "The answer is Axial or transverse plane" } ], "clip_score": 27.546875, "loss": 3.1452507972717285 }, { "id": "identity_108388", "image": "Super-CLEVR/images/superCLEVR_new_001932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are either green or have wheels in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.15625, "loss": 2.2074391841888428 }, { "id": "identity_315100", "image": "TabMWP/images/29725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Adam kept track of how many comic strips he read over the past 6 days. What is the average of these numbers?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.1875, "loss": 2.4744091033935547 }, { "id": "identity_179214", "image": "TabMWP/images/15256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each street loses 2 parking spots, how many spots will 3 streets have?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 33.09375, "loss": 2.517146348953247 }, { "id": "identity_114674", "image": "A-OKVQA/images/474253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct splashes can be seen around the horse?" }, { "from": "gpt", "value": "The answer is An integer number based on visible splashes" } ], "clip_score": 27.625, "loss": 3.8878252506256104 }, { "id": "identity_108543", "image": "A-OKVQA/images/426026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the closest kite to the camera?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 26.078125, "loss": 2.9217793941497803 }, { "id": "identity_24369", "image": "DocVQA/images/rtvw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where was the meeting held?" }, { "from": "gpt", "value": "The answer is CONFERENCE ROOM 555-6A" } ], "clip_score": 18.625, "loss": 2.570624589920044 }, { "id": "identity_157049", "image": "ScienceQA/images/13882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total volume of liquid in the glass flask?" }, { "from": "gpt", "value": "The answer is 500 ml" } ], "clip_score": 15.78125, "loss": 1.210433840751648 }, { "id": "identity_135158", "image": "MapQA/images/map_11667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code for the state of Texas?" }, { "from": "gpt", "value": "The answer is 45.2%-57.9%" } ], "clip_score": 28.421875, "loss": 2.399974822998047 }, { "id": "identity_247784", "image": "PMC-VQA/images/PMC8486363_FIG1_74172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate volume of the hemorrhage in cubic centimeters (cc) if the hemorrhage is assumed to be an ellipsoid with the following dimensions: 4 cm (length), 3 cm (width), and 2 cm (height)?" }, { "from": "gpt", "value": "The answer is 12.56 cubic centimeters" } ], "clip_score": 26.390625, "loss": 1.8207899332046509 }, { "id": "identity_108496", "image": "Super-CLEVR/images/superCLEVR_new_001511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels visible?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.515625, "loss": 2.587127923965454 }, { "id": "identity_188159", "image": "PMC-VQA/images/PMC7713150_Fig3_314221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement at the bottom right of the image?" }, { "from": "gpt", "value": "The answer is The scale bar measurement is not visible in the image" } ], "clip_score": 21.09375, "loss": 2.2542855739593506 }, { "id": "identity_256421", "image": "ChartQA/images/multi_col_60547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total difference between Urban and Rural regions for all categories combined?" }, { "from": "gpt", "value": "The answer is 66%" } ], "clip_score": 32.59375, "loss": 2.0932633876800537 }, { "id": "identity_209333", "image": "PMC-VQA/images/PMC4567369_f01_422971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the brain is shown at the top of the image?" }, { "from": "gpt", "value": "The answer is Cerebral hemispheres" } ], "clip_score": 30.265625, "loss": 1.943162441253662 }, { "id": "identity_72298", "image": "TQA/images/question_images/solar_system_6291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which celestial body in the image has the most prominent ring system?" }, { "from": "gpt", "value": "The answer is Saturn" } ], "clip_score": 26.0625, "loss": 2.152722120285034 }, { "id": "identity_121660", "image": "Super-CLEVR/images/superCLEVR_new_000239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object that is furthest from the camera?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.25, "loss": 3.8330204486846924 }, { "id": "identity_317062", "image": "Geometry3K/images/861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of \\angle 4.\nChoices:\n(A) 45\n(B) 60\n(C) 75\n(D) 120" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.390625, "loss": 3.222745418548584 }, { "id": "identity_204553", "image": "PMC-VQA/images/PMC6215355_Fig2_390952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the MRI slice shown in the image?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 33.34375, "loss": 3.4392929077148438 }, { "id": "identity_183320", "image": "A-OKVQA/images/530823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person on the left engaged in, as suggested by the items displayed and their position?" }, { "from": "gpt", "value": "The answer is Selling textiles or clothing" } ], "clip_score": 17.1875, "loss": 2.5978803634643555 }, { "id": "identity_151879", "image": "ScienceQA/images/718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of yellow particles to solvent volume in Solution B?" }, { "from": "gpt", "value": "The answer is 0.25 particles per mL" } ], "clip_score": 38.3125, "loss": 2.2820801734924316 }, { "id": "identity_329210", "image": "PMC-VQA/images/PMC7264643_f7_182123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the green regions in the image indicate?\nChoices:\n(A) Visual areas\n(B) Auditory areas\n(C) Multimodal areas\n(D) Motor areas" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.703125, "loss": 4.422850131988525 }, { "id": "identity_304579", "image": "AI2D/images/2147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which stage do larvae develop into polyps?\n\nChoices:\n(A) 1st\n(B) 3rd\n(C) 4th\n(D) 2nd" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 4.157134532928467 }, { "id": "identity_182803", "image": "VQA2.0/images/573184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the aircraft in the foreground in motion?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.484375, "loss": 3.336465835571289 }, { "id": "identity_296687", "image": "VizWiz/images/VizWiz_train_00023683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is tablet" } ], "clip_score": 18.578125, "loss": 2.9857847690582275 }, { "id": "identity_16897", "image": "ScienceQA/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of rock is tillite?\nChoices:\n(A) igneous\n(B) sedimentary\n(C) metamorphic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.08669900894165 }, { "id": "identity_307084", "image": "DVQA/images/bar_train_00160851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals prefer the object cross?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.015625, "loss": 2.284250259399414 }, { "id": "identity_113032", "image": "Super-CLEVR/images/superCLEVR_new_001204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles are present in the image?" }, { "from": "gpt", "value": "The answer is Bus, car, bicycles, airplane" } ], "clip_score": 29.03125, "loss": 2.2567291259765625 }, { "id": "identity_138784", "image": "MapQA/images/map_1306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest range of the number of adults?" }, { "from": "gpt", "value": "The answer is Wyoming" } ], "clip_score": 31.890625, "loss": 2.3081462383270264 }, { "id": "identity_103387", "image": "VQA2.0/images/356015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.296875, "loss": 2.4601361751556396 }, { "id": "identity_191909", "image": "A-OKVQA/images/160325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the location mentioned on the side of the trolley?" }, { "from": "gpt", "value": "The answer is Main Street Plaza" } ], "clip_score": 28.625, "loss": 3.411910057067871 }, { "id": "identity_251025", "image": "VizWiz/images/VizWiz_train_00017189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the physical characteristics and surroundings of the dog in the image, including its breed, color, and any objects or features in its environment" }, { "from": "gpt", "value": "The answer is Lab, light brown, green toy, brick wall, black pipe, gravel" } ], "clip_score": 30.625, "loss": 3.0389246940612793 }, { "id": "identity_146108", "image": "ChartQA/images/multi_col_60015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of the population share of the age group 65 years and older from 2009 to 2019?" }, { "from": "gpt", "value": "The answer is Increasing" } ], "clip_score": 30.96875, "loss": 3.1406655311584473 }, { "id": "identity_53071", "image": "FigureQA/images/53222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend in alphabetical order?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.4375, "loss": 3.4320120811462402 }, { "id": "identity_163975", "image": "GeoQA+/images/5517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle, if angle B is 30 degrees and angle C is 60 degrees, and the length of AC is 3, what is the measure of angle A in degrees?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 34.8125, "loss": 2.276155948638916 }, { "id": "identity_4452", "image": "ChartQA/images/16902888001203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the value of smallest bar?" }, { "from": "gpt", "value": "The answer is 62.22" } ], "clip_score": 13.1484375, "loss": 2.8842036724090576 }, { "id": "identity_15757", "image": "Super-CLEVR/images/superCLEVR_new_001361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a blue bus that is the same size as the red bus; what material is it?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 19.8125, "loss": 3.419340133666992 }, { "id": "identity_150115", "image": "DocVQA/images/zrlp0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of days worked in October 1975?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 23.59375, "loss": 2.2556684017181396 }, { "id": "identity_152872", "image": "ScienceQA/images/7966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the east?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 33.375, "loss": 2.8293027877807617 }, { "id": "identity_103489", "image": "TextVQA/images/20761845a2df7fe2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the park where the game is being played?" }, { "from": "gpt", "value": "The answer is Nationals Park" } ], "clip_score": 34.25, "loss": 1.8741559982299805 }, { "id": "identity_244316", "image": "A-OKVQA/images/37678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the number of years of steam power is divided by the number of years between 1863 and 2013, what is the resulting quotient?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.78125, "loss": 2.5785715579986572 }, { "id": "identity_160407", "image": "GeoQA+/images/3015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AO is parallel to DC and angle B is 55 degrees, what is the measure of angle COD?" }, { "from": "gpt", "value": "The answer is 125 degrees" } ], "clip_score": 30.515625, "loss": 1.7165470123291016 }, { "id": "identity_227486", "image": "ChartQA/images/16902888001203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and the lowest attendance rates?" }, { "from": "gpt", "value": "The answer is 35.69%" } ], "clip_score": 27.046875, "loss": 2.2744815349578857 }, { "id": "identity_20300", "image": "A-OKVQA/images/292730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a candle in the woman's dessert?\nChoices:\n(A) to trick\n(B) to celebrate\n(C) as joke\n(D) for light" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.328037261962891 }, { "id": "identity_276747", "image": "TQA/images/question_images/human_system_eye_6129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure that represents the cornea from the following options:\n(A) 4\n(B) 7\n(C) 2\n(D) 3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.78125, "loss": 3.599088668823242 }, { "id": "identity_295261", "image": "Geometry3K/images/597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x, assuming that segments that appear to be tangent are indeed tangent.\nChoices:\n(A) 6\n(B) 8\n(C) 9.6\n(D) 14.6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.46875, "loss": 3.0131523609161377 }, { "id": "identity_4621", "image": "TQA/images/question_images/human_system_circulatory_3622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the lungs to the heart?\nChoices:\n(A) aorta\n(B) Systemic veins\n(C) right atrium\n(D) right ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.65625, "loss": 4.160518646240234 }, { "id": "identity_218476", "image": "A-OKVQA/images/507927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the biker in the foreground wearing?" }, { "from": "gpt", "value": "The answer is Boots" } ], "clip_score": 27.6875, "loss": 2.1329333782196045 }, { "id": "identity_155154", "image": "ScienceQA/images/3312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which direction would you travel from the highlighted country to reach the state directly south of it?" }, { "from": "gpt", "value": "The answer is South" } ], "clip_score": 27.3125, "loss": 2.898707151412964 }, { "id": "identity_244729", "image": "DocVQA/images/llhv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the publication and its publication number?" }, { "from": "gpt", "value": "The answer is The Safety of Artificial Sweeteners for Use in Foods, 386" } ], "clip_score": 40.875, "loss": 1.1389025449752808 }, { "id": "identity_31906", "image": "PMC-VQA/images/PMC5559574_Fig1_203935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the finding in the acetabulum seen in the image?\nChoices:\n(A) Posterior inferior \n(B) Anterosuperior \n(C) Posterosuperior \n(D) Anteroinferior " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.265625, "loss": 4.677380084991455 }, { "id": "identity_105666", "image": "VQA-AS/images/15952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What electronic device is the person on the right using?" }, { "from": "gpt", "value": "The answer is A computer" } ], "clip_score": 17.59375, "loss": 3.6186411380767822 }, { "id": "identity_258441", "image": "ScienceQA/images/11305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of Vermont and then name the capital of the state directly to the west of Vermont" }, { "from": "gpt", "value": "The answer is Albany" } ], "clip_score": 24.515625, "loss": 3.549025774002075 }, { "id": "identity_41709", "image": "ChartQA/images/two_col_40202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of sales is represented by Europe?" }, { "from": "gpt", "value": "The answer is 27%" } ], "clip_score": 29.71875, "loss": 1.9813272953033447 }, { "id": "identity_122460", "image": "VQA-RAD/images/synpic18319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the heart size within normal limits on this chest X-ray?" }, { "from": "gpt", "value": "The answer is No, the heart size appears enlarged" } ], "clip_score": 31.984375, "loss": 2.025435447692871 }, { "id": "identity_86197", "image": "GeoQA+/images/4730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC in the given circle?" }, { "from": "gpt", "value": "The answer is 155 degrees" } ], "clip_score": 34.3125, "loss": 2.550795316696167 }, { "id": "identity_66136", "image": "ScienceQA/images/19132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what can be inferred about the capybara's diet?" }, { "from": "gpt", "value": "The answer is Herbivorous, feeds on grass" } ], "clip_score": 32.03125, "loss": 2.760929584503174 }, { "id": "identity_219166", "image": "VQA2.0/images/18723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Partly cloudy" } ], "clip_score": 11.8515625, "loss": 2.21042537689209 }, { "id": "identity_63754", "image": "ScienceQA/images/11286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a rat with genotype 'Ee' is crossed with a rat with genotype 'ee', what is the probability that their offspring will have standard ears?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 28.359375, "loss": 1.676396369934082 }, { "id": "identity_150276", "image": "DocVQA/images/xqnf0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What award did Dr. Charles Yanofsky receive according to the text?" }, { "from": "gpt", "value": "The answer is Albert Lasker Award" } ], "clip_score": 14.9296875, "loss": 2.47932505607605 }, { "id": "identity_97025", "image": "IconQA/images/1353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.0625, "loss": 2.0584542751312256 }, { "id": "identity_209159", "image": "TextVQA/images/804c5ccaa4631670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are flying in the air in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 17.875, "loss": 2.139535903930664 }, { "id": "identity_82809", "image": "Geometry3K/images/395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the rectangle formed by the base and height of the right triangle?" }, { "from": "gpt", "value": "The answer is 377 cm²" } ], "clip_score": 33.5625, "loss": 2.279148817062378 }, { "id": "identity_46235", "image": "FigureQA/images/80131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Cornflower line ever reach a value of 60 on the y-axis label?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.84375, "loss": 3.197751522064209 }, { "id": "identity_58432", "image": "DocVQA/images/szbj0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the complete phone number listed on the business card?" }, { "from": "gpt", "value": "The answer is 916-752-3163" } ], "clip_score": 22.328125, "loss": 2.216792106628418 }, { "id": "identity_329395", "image": "VQA2.0/images/12145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event are they celebrating?" }, { "from": "gpt", "value": "The answer is sports event" } ], "clip_score": 16.78125, "loss": 3.0256893634796143 }, { "id": "identity_80667", "image": "GeoQA+/images/338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that angle B is 40 degrees larger than angle A in parallelogram ABCD, what is the measure of angle A?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 24.28125, "loss": 2.019580841064453 }, { "id": "identity_203752", "image": "A-OKVQA/images/412764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place in the center of the field?" }, { "from": "gpt", "value": "The answer is Dog frisbee competition" } ], "clip_score": 30.296875, "loss": 2.1311535835266113 }, { "id": "identity_61912", "image": "DocVQA/images/rtxm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the dates mentioned for the seminar?" }, { "from": "gpt", "value": "The answer is 23, 24 and 25 November 1972" } ], "clip_score": 24.625, "loss": 1.56942880153656 }, { "id": "identity_173808", "image": "IconQA/images/76675.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which picture is fourth?" }, { "from": "gpt", "value": "The answer is A green airplane" } ], "clip_score": 22.671875, "loss": 3.9604437351226807 }, { "id": "identity_190694", "image": "VQA2.0/images/130138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Sunny and dry weather" } ], "clip_score": 16.859375, "loss": 3.0306615829467773 }, { "id": "identity_80230", "image": "GeoQA+/images/4592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with diameter AB, if angle CAB is 25 degrees, what is the measure of angle COD?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 35.5625, "loss": 2.046504497528076 }, { "id": "identity_45102", "image": "DVQA/images/bar_train_00122934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value represented by a solid bar?" }, { "from": "gpt", "value": "The answer is voting" } ], "clip_score": 24.046875, "loss": 4.3146796226501465 }, { "id": "identity_266228", "image": "TextVQA/images/aec1f662f074dd76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the numbers displayed on the screen?" }, { "from": "gpt", "value": "The answer is 678'" } ], "clip_score": 20.90625, "loss": 3.3975718021392822 }, { "id": "identity_146491", "image": "DVQA/images/bar_train_00104756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any algorithm that has the same accuracy value for both 'noise' and 'money' datasets?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.5625, "loss": 3.0560598373413086 }, { "id": "identity_260638", "image": "TabMWP/images/27053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of athletes from countries that have at least 61 athletes?" }, { "from": "gpt", "value": "The answer is 367" } ], "clip_score": 36.34375, "loss": 2.438854932785034 }, { "id": "identity_173440", "image": "IconQA/images/62176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each yard is equivalent to 3 feet, what is the perimeter of the rectangle in feet?" }, { "from": "gpt", "value": "The answer is 54 feet" } ], "clip_score": 32.15625, "loss": 2.2131495475769043 }, { "id": "identity_180865", "image": "TabMWP/images/8457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of baskets Harper made during the week?" }, { "from": "gpt", "value": "The answer is 53" } ], "clip_score": 38.5625, "loss": 2.1853511333465576 }, { "id": "identity_181749", "image": "CLEVR-Math/images/CLEVR_train_003019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.6875, "loss": 2.5923426151275635 }, { "id": "identity_7888", "image": "DVQA/images/bar_train_00131191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 24.015625, "loss": 2.5393733978271484 }, { "id": "identity_87283", "image": "Geometry3K/images/171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if the measure of angle ABC is 83.2 degrees, find the measure of angle BAD" }, { "from": "gpt", "value": "The answer is 83.2 degrees" } ], "clip_score": 34.8125, "loss": 1.0753709077835083 }, { "id": "identity_330650", "image": "Geometry3K/images/1149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given \\(\\frac { I J } { X J } = \\frac { H J } { Y J }\\), \\(m \\angle W X J = 130^\\circ\\), and \\(m \\angle W Z G = 20^\\circ\\), find \\(m \\angle J H G\\).\n\nChoices:\n(A) 100\n(B) 110\n(C) 120\n(D) 130" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.671875, "loss": 2.3911843299865723 }, { "id": "identity_169004", "image": "IconQA/images/37176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of diamonds in both rows?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 28.953125, "loss": 1.842077612876892 }, { "id": "identity_311831", "image": "ScienceQA/images/5372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Rhode Island\n(B) Tennessee\n(C) New Hampshire\n(D) Florida" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.8125, "loss": 4.468363285064697 }, { "id": "identity_3841", "image": "A-OKVQA/images/125476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What design is on each arm?\nChoices:\n(A) cat\n(B) eagles\n(C) dog\n(D) bat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.53125, "loss": 4.0966572761535645 }, { "id": "identity_236969", "image": "VizWiz/images/VizWiz_train_00017124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible settings on the dial, and which setting is currently selected?" }, { "from": "gpt", "value": "The answer is Off, Low, High, Warm; currently set to High" } ], "clip_score": 18.84375, "loss": 2.7365312576293945 }, { "id": "identity_300047", "image": "FigureQA/images/42047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson the highest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.03125, "loss": 3.8780360221862793 }, { "id": "identity_110913", "image": "VizWiz/images/VizWiz_train_00002718.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of baked good is shown in the image?" }, { "from": "gpt", "value": "The answer is Chocolate muffin" } ], "clip_score": 30.78125, "loss": 2.248878002166748 }, { "id": "identity_174177", "image": "IconQA/images/80899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are in the second row?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.6875, "loss": 2.4685637950897217 }, { "id": "identity_105007", "image": "VQA2.0/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the couch in the room?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 22.375, "loss": 3.7214951515197754 }, { "id": "identity_256106", "image": "ScienceQA/images/20978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is attract" } ], "clip_score": 21.25, "loss": 3.1039998531341553 }, { "id": "identity_128756", "image": "A-OKVQA/images/264567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the horses in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Brown and white" } ], "clip_score": 25.546875, "loss": 2.843381404876709 }, { "id": "identity_304701", "image": "VQA-AS/images/10212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a frog on something?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.671875, "loss": 3.6075339317321777 }, { "id": "identity_103194", "image": "CLEVR-Math/images/CLEVR_train_004657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are shiny?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.1875, "loss": 2.579963445663452 }, { "id": "identity_302999", "image": "TabMWP/images/3019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each twig has 10 beetles. How many beetles are on some twigs?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 35.59375, "loss": 1.9332106113433838 }, { "id": "identity_280222", "image": "DVQA/images/bar_train_00090830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the market category, how many people prefer the speed of the object?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.578125, "loss": 2.3793256282806396 }, { "id": "identity_122237", "image": "PMC-VQA/images/PMC7699868_nutrients-12-03566-f004_309850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cellular structures are predominantly stained in this image?" }, { "from": "gpt", "value": "The answer is Epithelial cells" } ], "clip_score": 26.203125, "loss": 2.173764228820801 }, { "id": "identity_263492", "image": "UniGeo/images/2466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given: In the figure, AB is a secant of circle O at point B, OA intersects circle O at point C, and point P is on circle O. If ∠BAC = 40°, what is the degree measure of ∠BPC?\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 40°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.5, "loss": 2.2658379077911377 }, { "id": "identity_6463", "image": "DocVQA/images/ymhm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 15.3203125, "loss": 4.075747489929199 }, { "id": "identity_548", "image": "VQA2.0/images/70363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it sunny?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.8828125, "loss": 3.6956939697265625 }, { "id": "identity_85365", "image": "Geometry3K/images/1592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the blue figure is reduced to have an area of 100 cm^2, what would be the new length of the side labeled 'x'?" }, { "from": "gpt", "value": "The answer is 4.53 cm" } ], "clip_score": 30.796875, "loss": 2.1426188945770264 }, { "id": "identity_175297", "image": "TabMWP/images/26028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of miles driven by Kenny from Thursday to Saturday?" }, { "from": "gpt", "value": "The answer is 1228 miles" } ], "clip_score": 37.4375, "loss": 1.9731372594833374 }, { "id": "identity_334227", "image": "GeoQA+/images/2127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the image, in circle O, if ∠AOB = 70°, then what is ∠ACB?\n\nChoices:\n(A) 70°\n(B) 140°\n(C) 35°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.03125, "loss": 2.8653671741485596 }, { "id": "identity_127142", "image": "A-OKVQA/images/317851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.71875, "loss": 3.3529129028320312 }, { "id": "identity_139974", "image": "DocVQA/images/pjxn0226_86.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of provider has the highest percentage of Hybrid procedures?" }, { "from": "gpt", "value": "The answer is Independent hospitals" } ], "clip_score": 23.234375, "loss": 3.0276434421539307 }, { "id": "identity_287299", "image": "ScienceQA/images/12301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which direction is the push directed in relation to the bottom magnet?\nChoices:\n(A) away from the bottom magnet\n(B) toward the bottom magnet" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.0360307693481445 }, { "id": "identity_288756", "image": "IconQA/images/40790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of squares present?\nChoices:\n(A) 7\n(B) 8\n(C) 6\n(D) 1\n(E) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.65625, "loss": 3.4866929054260254 }, { "id": "identity_79853", "image": "Geometry3K/images/813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are triangles LNM and NMP congruent when x = 2?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.125, "loss": 2.4236698150634766 }, { "id": "identity_294208", "image": "VQA-AS/images/12755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current location of the sun?" }, { "from": "gpt", "value": "The answer is behind cloud" } ], "clip_score": 26.453125, "loss": 3.758058547973633 }, { "id": "identity_37564", "image": "VQA2.0/images/456699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color shirt is the batter wearing?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 20.234375, "loss": 3.718043088912964 }, { "id": "identity_63483", "image": "ScienceQA/images/3269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the poles of the magnets in the image labeled with the same or different letters?" }, { "from": "gpt", "value": "The answer is Same letters on facing poles" } ], "clip_score": 20.34375, "loss": 3.7312240600585938 }, { "id": "identity_284786", "image": "A-OKVQA/images/362005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the area where the man is skating made of? Is it:\n(A) metal\n(B) plastic\n(C) concrete\n(D) wood" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.25, "loss": 4.140772819519043 }, { "id": "identity_153165", "image": "ScienceQA/images/17823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water located north of Australia?" }, { "from": "gpt", "value": "The answer is Arafura Sea" } ], "clip_score": 24.4375, "loss": 2.5331034660339355 }, { "id": "identity_267173", "image": "IconQA/images/81908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has a greater quantity, pillows or sleeping bags?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.6875, "loss": 3.6443939208984375 }, { "id": "identity_134989", "image": "DocVQA/images/nyvx0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the Division Engineer at Rock Island Lines?" }, { "from": "gpt", "value": "The answer is Mr. T. W. Iael" } ], "clip_score": 20.640625, "loss": 4.170116424560547 }, { "id": "identity_73903", "image": "ScienceQA/images/12272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the Gulf of Mexico as shown on this map?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 33.46875, "loss": 3.5046088695526123 }, { "id": "identity_309172", "image": "FigureQA/images/21461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color less than that color?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.890625, "loss": 3.963322401046753 }, { "id": "identity_188500", "image": "VizWiz/images/VizWiz_train_00007980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the thermos?" }, { "from": "gpt", "value": "The answer is Blue flowers" } ], "clip_score": 30.328125, "loss": 3.138277053833008 }, { "id": "identity_95460", "image": "TabMWP/images/17331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of all the events scheduled after 5 P.M.?" }, { "from": "gpt", "value": "The answer is 4 hours and 15 minutes" } ], "clip_score": 24.734375, "loss": 1.4774694442749023 }, { "id": "identity_321772", "image": "DocVQA/images/xhhk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall amount?" }, { "from": "gpt", "value": "The answer is 89.65" } ], "clip_score": 16.125, "loss": 2.3969132900238037 }, { "id": "identity_229167", "image": "UniGeo/images/1579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在四边形ABCD中,AD∥BC,AC,BD交于点O,过点O作EF∥AD交AB于点E,F,若AE=2,BE=5,OD=3,若EF=6,则BD长为多少?" }, { "from": "gpt", "value": "The answer is 10.5" } ], "clip_score": 24.953125, "loss": 1.9771196842193604 }, { "id": "identity_58768", "image": "FigureQA/images/77969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points does Mint have above the value of 50?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 26.9375, "loss": 2.2456724643707275 }, { "id": "identity_321805", "image": "MapQA/images/map_12233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the lowest value in the Northeast?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.875, "loss": 2.6837687492370605 }, { "id": "identity_44518", "image": "FigureQA/images/73685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value reached by the Crimson line?" }, { "from": "gpt", "value": "The answer is Approximately 48" } ], "clip_score": 24.671875, "loss": 2.842083215713501 }, { "id": "identity_280482", "image": "TextVQA/images/cd4f7cee4adb50b6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By whom was this book written?" }, { "from": "gpt", "value": "The answer is fritz leiber" } ], "clip_score": 27.125, "loss": 3.2123851776123047 }, { "id": "identity_267303", "image": "IconQA/images/46108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of the shape that is colored pink? Is it (A) 1/11, (B) 1/9, (C) 1/4, or (D) 3/10?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.5, "loss": 2.7754111289978027 }, { "id": "identity_330393", "image": "A-OKVQA/images/33647.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many months until the leaves change color?\nChoices:\n(A) 3-4\n(B) 1-2\n(C) 5-7\n(D) 10-12" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.65625, "loss": 3.6384873390197754 }, { "id": "identity_313679", "image": "TQA/images/question_images/human_system_digestive_3694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many components of the digestive system are shown?\nChoices:\n(A) 18\n(B) 16\n(C) 14\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.25, "loss": 3.059636354446411 }, { "id": "identity_108497", "image": "Super-CLEVR/images/superCLEVR_new_001511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the yellow car?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 23.671875, "loss": 3.6967408657073975 }, { "id": "identity_205973", "image": "VQA2.0/images/7746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole cakes are visible on the table?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 24.921875, "loss": 4.707282066345215 }, { "id": "identity_231445", "image": "FigureQA/images/29043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage is Turquoise greater than Light Green?" }, { "from": "gpt", "value": "The answer is 200%" } ], "clip_score": 39.09375, "loss": 1.444637417793274 }, { "id": "identity_188360", "image": "PMC-VQA/images/PMC7523872_F1_262817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is depicted in the image?" }, { "from": "gpt", "value": "The answer is Pelvic region" } ], "clip_score": 23.5, "loss": 3.4529128074645996 }, { "id": "identity_78810", "image": "TQA/images/question_images/human_system_respiratory_3585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the small air sacs labeled at the end of the bronchi?" }, { "from": "gpt", "value": "The answer is Alveoli" } ], "clip_score": 25.71875, "loss": 2.3183491230010986 }, { "id": "identity_88396", "image": "UniGeo/images/2743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if ∠OBC = 50°, what is the length of arc BC?" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 35.75, "loss": 2.432971239089966 }, { "id": "identity_44968", "image": "FigureQA/images/10218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have more than two colors represented?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.0625, "loss": 3.7053310871124268 }, { "id": "identity_259335", "image": "PMC-VQA/images/PMC6814365_fig1s1_56521.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average voltage across the regions of interest (ROI) if the average \\( T_m \\) across ROI 1 is 1.6 ns and across ROI 2 is 1.7 ns, given that \\( b = 1.5 \\) ns and \\( m = 0.01 \\) ns/mV?" }, { "from": "gpt", "value": "The answer is 15 mV" } ], "clip_score": 22.8125, "loss": 2.2077298164367676 }, { "id": "identity_67323", "image": "TQA/images/question_images/faults_1735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the surface line where the fault occurs?" }, { "from": "gpt", "value": "The answer is Fault trace" } ], "clip_score": 33.1875, "loss": 3.427691698074341 }, { "id": "identity_172692", "image": "TabMWP/images/29994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy 1 pound of each type of orange listed, how much will the total cost be?" }, { "from": "gpt", "value": "The answer is $14" } ], "clip_score": 35.40625, "loss": 2.4450840950012207 }, { "id": "identity_59569", "image": "ChartQA/images/multi_col_80072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of respondents who would watch less across all categories?" }, { "from": "gpt", "value": "The answer is 85%" } ], "clip_score": 31.484375, "loss": 1.9555063247680664 }, { "id": "identity_267747", "image": "GeoQA+/images/5635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, if regular pentagon ABCDE is inscribed in circle O, what is the degree measure of angle ABD?\nChoices:\n(A) 60°\n(B) 72°\n(C) 78°\n(D) 144°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.0, "loss": 2.7002205848693848 }, { "id": "identity_200083", "image": "A-OKVQA/images/434354.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing glasses?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 12.7578125, "loss": 2.5914971828460693 }, { "id": "identity_61721", "image": "DocVQA/images/pslg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hours worked by all employees listed in the document?" }, { "from": "gpt", "value": "The answer is 115.6 hours" } ], "clip_score": 22.0625, "loss": 2.0886545181274414 }, { "id": "identity_70737", "image": "ScienceQA/images/1653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of these regions is the farthest east?" }, { "from": "gpt", "value": "The answer is The top right region" } ], "clip_score": 26.796875, "loss": 3.91180157661438 }, { "id": "identity_116692", "image": "Super-CLEVR/images/superCLEVR_new_001342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.71875, "loss": 2.022801399230957 }, { "id": "identity_213879", "image": "PMC-VQA/images/PMC3457007_F5_156995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct colored regions can be observed in section D?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.265625, "loss": 4.380340576171875 }, { "id": "identity_41856", "image": "FigureQA/images/72454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by Dark Red?" }, { "from": "gpt", "value": "The answer is Approximately 20" } ], "clip_score": 28.09375, "loss": 2.9180819988250732 }, { "id": "identity_312045", "image": "Geometry3K/images/697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If K L M N is a parallelogram, determine y.\nChoices:\n(A) 11\n(B) 17\n(C) 26\n(D) 28" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.3125, "loss": 2.811601161956787 }, { "id": "identity_130957", "image": "A-OKVQA/images/530278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the vase's handles?" }, { "from": "gpt", "value": "The answer is Loop-shaped" } ], "clip_score": 27.53125, "loss": 3.5051231384277344 }, { "id": "identity_129878", "image": "VQA2.0/images/94455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible motorcycles have a red color?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 25.09375, "loss": 2.4755403995513916 }, { "id": "identity_135845", "image": "FigureQA/images/6067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Blue section smaller than the Gray section?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.703125, "loss": 3.5204195976257324 }, { "id": "identity_53645", "image": "DocVQA/images/qnwg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode used for the \"SEND\" activity to remote station \"+41 31 9813188\"?" }, { "from": "gpt", "value": "The answer is EC" } ], "clip_score": 23.34375, "loss": 4.503419876098633 }, { "id": "identity_184727", "image": "A-OKVQA/images/498669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many oars does the boatman have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.984375, "loss": 3.932499408721924 }, { "id": "identity_118570", "image": "PMC-VQA/images/PMC6257712_FIG4_401193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality was used to obtain this image of the spine?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 35.5, "loss": 3.3496248722076416 }, { "id": "identity_320452", "image": "AI2D/images/4085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the mantle?\nChoices:\n(A) C\n(B) G\n(C) B\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.265625, "loss": 3.900411605834961 }, { "id": "identity_213778", "image": "VizWiz/images/VizWiz_train_00004665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.96875, "loss": 4.398008823394775 }, { "id": "identity_109387", "image": "PMC-VQA/images/PMC9348820_F1_363019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red markers are visible in the \"Ctrl\" confocal image?" }, { "from": "gpt", "value": "The answer is 12 markers" } ], "clip_score": 27.03125, "loss": 2.966231107711792 }, { "id": "identity_215597", "image": "A-OKVQA/images/10484.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the train that is heading away?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.6875, "loss": 3.7504444122314453 }, { "id": "identity_7872", "image": "GeoQA+/images/3670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,∠A=60°,BD⊥AC,垂足为D,CE⊥AB,垂足为E,O为BC的中点,连接OD、OE,则∠DOE的度数为()\nChoices:\n(A) 40°\n(B) 45°\n(C) 60°\n(D) 65°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.734375, "loss": 2.75732421875 }, { "id": "identity_104508", "image": "A-OKVQA/images/199640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorbikes are visible in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.671875, "loss": 2.2168843746185303 }, { "id": "identity_263698", "image": "TQA/images/question_images/hair_follicles_6981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following structures produces an oily substance similar to sebum? \n(A) bulge region\n(B) dermal papilla\n(C) epidermis\n(D) sebaceous gland" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.4375, "loss": 3.6090385913848877 }, { "id": "identity_287591", "image": "PMC-VQA/images/PMC2644319_F2_34592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, where is the mass situated?\nChoices:\n(A) left atrium\n(B) right atrium\n(C) left ventricle\n(D) right ventricle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 3.9355978965759277 }, { "id": "identity_274333", "image": "VQA-RAD/images/synpic39086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do hyper attenuating materials exist along the aortic walls?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.484375, "loss": 3.3465774059295654 }, { "id": "identity_317406", "image": "TextVQA/images/954a6c4bdd79d8b6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign?" }, { "from": "gpt", "value": "The answer is playaways!" } ], "clip_score": 27.609375, "loss": 2.4450528621673584 }, { "id": "identity_200115", "image": "TextVQA/images/2d406449b97759a7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organization is associated with this event?" }, { "from": "gpt", "value": "The answer is Northern Ireland Chest Heart & Stroke" } ], "clip_score": 19.59375, "loss": 2.4289028644561768 }, { "id": "identity_70814", "image": "TQA/images/question_images/seasons_1049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which position is the Earth farthest from the Sun?" }, { "from": "gpt", "value": "The answer is Position A" } ], "clip_score": 31.3125, "loss": 2.4754650592803955 }, { "id": "identity_162663", "image": "UniGeo/images/300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,BF平分∠ABC,CF平分∠ACB,若∠BFC=115°,则∠BAC的度数是多少?" }, { "from": "gpt", "value": "The answer is 32.5°" } ], "clip_score": 30.46875, "loss": 1.826528549194336 }, { "id": "identity_314141", "image": "Super-CLEVR/images/superCLEVR_new_001586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other large objects of the same shape as the large red one?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.796875, "loss": 5.218440532684326 }, { "id": "identity_180450", "image": "CLEVR-Math/images/CLEVR_train_026027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither metallic nor cylinders?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.84375, "loss": 2.627772331237793 }, { "id": "identity_68890", "image": "ScienceQA/images/1032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located directly north of the highlighted state?" }, { "from": "gpt", "value": "The answer is Utah" } ], "clip_score": 32.40625, "loss": 2.8232479095458984 }, { "id": "identity_97622", "image": "TabMWP/images/3271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy one of each item, how much change will you receive from $200?" }, { "from": "gpt", "value": "The answer is $10" } ], "clip_score": 25.734375, "loss": 2.5136773586273193 }, { "id": "identity_97787", "image": "TabMWP/images/1030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average (mean) of the numbers in Pattern A?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 29.0, "loss": 2.3931446075439453 }, { "id": "identity_240912", "image": "IconQA/images/46882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 27.0625, "loss": 1.8765894174575806 }, { "id": "identity_95296", "image": "IconQA/images/9974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of geometric shape is this?" }, { "from": "gpt", "value": "The answer is Cone" } ], "clip_score": 31.796875, "loss": 2.942441940307617 }, { "id": "identity_173996", "image": "IconQA/images/22134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the image of the shovel is folded along the dashed line, will both halves match perfectly?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.203125, "loss": 3.0140976905822754 }, { "id": "identity_11408", "image": "A-OKVQA/images/73093.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the boy doing?\nChoices:\n(A) pitching\n(B) sleeping\n(C) dancing\n(D) eating" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.0, "loss": 4.594607830047607 }, { "id": "identity_16260", "image": "A-OKVQA/images/183166.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a strange thing to see on a motorcycle?\nChoices:\n(A) kids driving\n(B) umbrella\n(C) dog\n(D) two sidecars" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.984375, "loss": 4.306586265563965 }, { "id": "identity_64712", "image": "AI2D/images/3490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organelle is responsible for energy production in the plant cell?" }, { "from": "gpt", "value": "The answer is Mitochondrion" } ], "clip_score": 30.71875, "loss": 1.2061007022857666 }, { "id": "identity_296071", "image": "TabMWP/images/1366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between a parrot and a python?" }, { "from": "gpt", "value": "The answer is 296" } ], "clip_score": 24.8125, "loss": 2.948883533477783 }, { "id": "identity_228254", "image": "DocVQA/images/fnpy0000_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total quantity of cigarettes requested for both Winston Box and Winston Light Box, and what is the delivery deadline?" }, { "from": "gpt", "value": "The answer is 5,520 cigarettes, June 30, 1997" } ], "clip_score": 24.109375, "loss": 1.8926783800125122 }, { "id": "identity_82450", "image": "Geometry3K/images/556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x if the expressions on the vertical arrows are equal?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 31.546875, "loss": 2.471363067626953 }, { "id": "identity_228708", "image": "AI2D/images/3318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the label linked to the function of locomotion and explain how this structure contributes to the movement of the cell. Additionally, name another structure in the cell that works in conjunction with this labeled structure to facilitate movement" }, { "from": "gpt", "value": "The answer is Flagellum propels the cell; cytoskeleton supports flagellum movement" } ], "clip_score": 24.5, "loss": 2.9562788009643555 }, { "id": "identity_253597", "image": "DocVQA/images/rhhx0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total tax expense and the amount due after discount combined?" }, { "from": "gpt", "value": "The answer is $17,313.60" } ], "clip_score": 21.71875, "loss": 2.0097877979278564 }, { "id": "identity_321997", "image": "UniGeo/images/204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,AB∥CD,CD⊥EF,若∠1=125°,则∠2=()\n选项:\n(A) 25°\n(B) 35°\n(C) 55°\n(D) 65°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.15625, "loss": 2.7012240886688232 }, { "id": "identity_28571", "image": "DVQA/images/bar_train_00172652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first stack of bars from the bottom?" }, { "from": "gpt", "value": "The answer is fence" } ], "clip_score": 26.40625, "loss": 3.878819704055786 }, { "id": "identity_14325", "image": "Super-CLEVR/images/superCLEVR_new_001504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of tiny brown rubber things have the same shape as the large purple shiny object?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 16.171875, "loss": 2.0252084732055664 }, { "id": "identity_241363", "image": "DVQA/images/bar_train_00080168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the product of the accuracies of the algorithms cheek and inch, and then subtract the accuracy of the algorithm maid from this product?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 35.375, "loss": 2.547396421432495 }, { "id": "identity_99535", "image": "TabMWP/images/4063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of kilometers walked during the past 7 days?" }, { "from": "gpt", "value": "The answer is 50 kilometers" } ], "clip_score": 24.84375, "loss": 1.8281370401382446 }, { "id": "identity_132206", "image": "TextVQA/images/4748e3595f9984b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any flags being displayed in the image? If so, how many?" }, { "from": "gpt", "value": "The answer is Yes, multiple flags" } ], "clip_score": 15.8046875, "loss": 3.5217862129211426 }, { "id": "identity_274210", "image": "TabMWP/images/30089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals in Audrey's P.E. class were able to perform a minimum of 50 push-ups but less than 60 push-ups in the push-up competition?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.578125, "loss": 2.1230180263519287 }, { "id": "identity_159921", "image": "GeoQA+/images/1717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of angles MAB and ABN?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 32.96875, "loss": 1.52646803855896 }, { "id": "identity_25765", "image": "ScienceQA/images/13775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Arizona?\nChoices:\n(A) Tucson\n(B) Phoenix\n(C) Providence\n(D) Anchorage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.296875, "loss": 4.373635292053223 }, { "id": "identity_125596", "image": "PMC-VQA/images/PMC6701896_FIG2_25982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of bleeding in the brain in this image?" }, { "from": "gpt", "value": "The answer is No evidence of bleeding" } ], "clip_score": 30.890625, "loss": 3.21049165725708 }, { "id": "identity_236643", "image": "MapQA/images/map_10908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border New Mexico, which state has the highest value and what is that value?" }, { "from": "gpt", "value": "The answer is Arizona, Utah, Oklahoma, Texas with 60.5" } ], "clip_score": 23.65625, "loss": 2.7573037147521973 }, { "id": "identity_92024", "image": "TabMWP/images/550.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of students who picked biology and force and motion topics combined?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 26.421875, "loss": 2.338541030883789 }, { "id": "identity_134304", "image": "DVQA/images/bar_train_00002558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets does the 'nobody' algorithm have an accuracy of 5 or higher?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.15625, "loss": 2.3967392444610596 }, { "id": "identity_285662", "image": "DVQA/images/bar_train_00111623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the level of precision of the algorithm \"hour\" within the dataset \"edge\"?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 30.84375, "loss": 2.393289804458618 }, { "id": "identity_288143", "image": "DocVQA/images/tgdd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the document?" }, { "from": "gpt", "value": "The answer is December 16, 1970" } ], "clip_score": 23.109375, "loss": 1.5204217433929443 }, { "id": "identity_287988", "image": "A-OKVQA/images/556278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the source of illumination for the street?\nChoices:\n(A) fire\n(B) sun\n(C) street lights\n(D) flashlights" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.703125, "loss": 4.760587215423584 }, { "id": "identity_276230", "image": "DVQA/images/bar_train_00186386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many groups of bars is there at least one bar with a value exceeding 7?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 18.140625, "loss": 3.8892035484313965 }, { "id": "identity_78177", "image": "AI2D/images/1447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What dates correspond to the Winter Solstice in the Southern Hemisphere?" }, { "from": "gpt", "value": "The answer is June 21-22" } ], "clip_score": 29.25, "loss": 1.5542911291122437 }, { "id": "identity_273600", "image": "TextVQA/images/76248bbeb3817183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For which year is this poster intended?" }, { "from": "gpt", "value": "The answer is 1990" } ], "clip_score": 26.671875, "loss": 1.70290207862854 }, { "id": "identity_106151", "image": "VQA-RAD/images/synpic34836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any abnormality in the brain tissue?" }, { "from": "gpt", "value": "The answer is No obvious abnormalities identified" } ], "clip_score": 32.59375, "loss": 3.518428325653076 }, { "id": "identity_74319", "image": "ScienceQA/images/11623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a decomposer in this food web?" }, { "from": "gpt", "value": "The answer is Mushroom" } ], "clip_score": 29.375, "loss": 2.4323348999023438 }, { "id": "identity_123920", "image": "VQA2.0/images/373128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles can be seen in the background?" }, { "from": "gpt", "value": "The answer is At least three" } ], "clip_score": 25.75, "loss": 3.9910967350006104 }, { "id": "identity_61753", "image": "DocVQA/images/zrlp0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Saturday vacations were taken in May 1975?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 20.734375, "loss": 2.508413553237915 }, { "id": "identity_16956", "image": "ScienceQA/images/17361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Nevada?\nChoices:\n(A) Madison\n(B) Montpelier\n(C) Carson City\n(D) Cheyenne" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.90625, "loss": 4.250665187835693 }, { "id": "identity_179340", "image": "CLEVR-Math/images/CLEVR_train_044069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all metallic objects. How many objects are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.359375, "loss": 2.3279569149017334 }, { "id": "identity_131252", "image": "Super-CLEVR/images/superCLEVR_new_000922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that share the same color as the train?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.328125, "loss": 3.3170571327209473 }, { "id": "identity_123730", "image": "PMC-VQA/images/PMC7519446_fig3_261632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique was used for this image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) stain" } ], "clip_score": 29.65625, "loss": 0.998196542263031 }, { "id": "identity_81404", "image": "Geometry3K/images/781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle VWX?" }, { "from": "gpt", "value": "The answer is 13 units" } ], "clip_score": 35.03125, "loss": 2.444338798522949 }, { "id": "identity_333952", "image": "AI2D/images/4329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lychee shown as?\nChoices:\n(A) B\n(B) A\n(C) D\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.40625, "loss": 4.087706565856934 }, { "id": "identity_55709", "image": "FigureQA/images/97420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Maroon greater than 90 at x-axis label 40?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.046875, "loss": 2.9647085666656494 }, { "id": "identity_305788", "image": "PMC-VQA/images/PMC7027449_pd5591-fig-0001_120117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to detect the lesion?\nChoices:\n(A) X-ray \n(B) MRI \n(C) CT \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 4.058841705322266 }, { "id": "identity_65061", "image": "AI2D/images/3548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Chloroplast\"?" }, { "from": "gpt", "value": "The answer is Photosynthesis" } ], "clip_score": 26.6875, "loss": 1.9188493490219116 }, { "id": "identity_223404", "image": "Super-CLEVR/images/superCLEVR_new_000534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle's front door?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 20.578125, "loss": 2.4994447231292725 }, { "id": "identity_47008", "image": "DocVQA/images/xrcw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is Metabolic Effects of Menopausal Therapies" } ], "clip_score": 21.875, "loss": 2.6264569759368896 }, { "id": "identity_118800", "image": "A-OKVQA/images/561761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the kite closest to the top edge of the image?" }, { "from": "gpt", "value": "The answer is Red and blue" } ], "clip_score": 21.828125, "loss": 3.380504608154297 }, { "id": "identity_52497", "image": "FigureQA/images/32770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the title of the pie chart given?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.015625, "loss": 3.4337334632873535 }, { "id": "identity_215300", "image": "A-OKVQA/images/210241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number written on the side of the locomotive in the foreground?" }, { "from": "gpt", "value": "The answer is 5655" } ], "clip_score": 25.375, "loss": 1.6142703294754028 }, { "id": "identity_874", "image": "VizWiz/images/VizWiz_train_00000593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When does this expire?" }, { "from": "gpt", "value": "The answer is w" } ], "clip_score": 20.796875, "loss": 4.904585361480713 }, { "id": "identity_99989", "image": "TabMWP/images/9995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people surveyed?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 15.0078125, "loss": 2.2978432178497314 }, { "id": "identity_333085", "image": "A-OKVQA/images/26260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What species animal died for this meal?\nChoices:\n(A) capra\n(B) feline\n(C) canine\n(D) bovine" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.53125, "loss": 3.592324733734131 }, { "id": "identity_140870", "image": "DocVQA/images/pqll0006_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the author's position?" }, { "from": "gpt", "value": "The answer is Principal Scientist" } ], "clip_score": 20.234375, "loss": 2.982984781265259 }, { "id": "identity_236136", "image": "DVQA/images/bar_train_00120372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of all the categories shown in the bar chart?" }, { "from": "gpt", "value": "The answer is 65" } ], "clip_score": 32.125, "loss": 2.2921135425567627 }, { "id": "identity_157448", "image": "AI2D/images/346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the image, which life cycle includes a nymph stage?" }, { "from": "gpt", "value": "The answer is Grasshopper Life Cycle" } ], "clip_score": 35.0625, "loss": 1.2836527824401855 }, { "id": "identity_98226", "image": "IconQA/images/14319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many houses have a red door?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.484375, "loss": 2.34482741355896 }, { "id": "identity_303113", "image": "IconQA/images/98207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.9375, "loss": 4.107882022857666 }, { "id": "identity_43712", "image": "DVQA/images/bar_train_00069281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the 'gang' store and the 'basis' store for the item 'right'?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.53125, "loss": 2.7792489528656006 }, { "id": "identity_279413", "image": "ScienceQA/images/9970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of New Jersey?\nChoices:\n(A) Wichita\n(B) Trenton\n(C) Knoxville\n(D) Providence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.71875, "loss": 4.08508825302124 }, { "id": "identity_167142", "image": "TabMWP/images/26835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bracelets have an even number of silver beads at Fairfax Jewelry?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 35.125, "loss": 2.0859928131103516 }, { "id": "identity_229403", "image": "FigureQA/images/97795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average value over the entire range of the x-axis?" }, { "from": "gpt", "value": "The answer is Gold, Pale Green, and Medium Blue have the highest average value" } ], "clip_score": 31.546875, "loss": 2.0484158992767334 }, { "id": "identity_240828", "image": "TabMWP/images/20416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Carrie bought 5 pounds of Parmesan, 1 pound of ricotta, and 2 pounds of Swiss cheese, how much did she spend in total?" }, { "from": "gpt", "value": "The answer is $58.62" } ], "clip_score": 34.71875, "loss": 2.458134174346924 }, { "id": "identity_257991", "image": "PMC-VQA/images/PMC4563085_fig1_421918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the radiological features that differentiate pleural thickening from pleural effusion and consolidation in the left apical region?" }, { "from": "gpt", "value": "The answer is Pleural thickening" } ], "clip_score": 30.609375, "loss": 3.4905800819396973 }, { "id": "identity_324482", "image": "A-OKVQA/images/111940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many mufflers are there on this bike?\nChoices:\n(A) two\n(B) four\n(C) one\n(D) three" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.75, "loss": 3.4143154621124268 }, { "id": "identity_1227", "image": "TQA/images/question_images/earth_moon_phases_2553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the Moon located during the New Moon phase?\nChoices:\n(A) Partially between the Sun and the Earth\n(B) Behind the Sun\n(C) Between the Sun and the Earth\n(D) Behind the Earth" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.625, "loss": 4.280731201171875 }, { "id": "identity_284271", "image": "DocVQA/images/grxh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the method of analysis for turbidity issued?" }, { "from": "gpt", "value": "The answer is 1962" } ], "clip_score": 23.15625, "loss": 1.9164843559265137 }, { "id": "identity_251372", "image": "A-OKVQA/images/486415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the different types of food items present on the table and categorize them into fruits, vegetables, grains, and other food categories" }, { "from": "gpt", "value": "The answer is Fruits, grains, and other food categories" } ], "clip_score": 22.703125, "loss": 2.1964828968048096 }, { "id": "identity_263979", "image": "GeoQA+/images/338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in quadrilateral ABCD, if angle B is 40° larger than angle A, what is the measure of angle D?\nChoices:\n(A) 60°\n(B) 70°\n(C) 100°\n(D) 110°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.46875, "loss": 2.4019651412963867 }, { "id": "identity_255931", "image": "A-OKVQA/images/559837.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the object that should be put in the hole near the nearby child, and how does it contribute to the environment shown in the image?" }, { "from": "gpt", "value": "The answer is To maintain cleanliness and hygiene in the public space" } ], "clip_score": 21.203125, "loss": 2.850175380706787 }, { "id": "identity_215732", "image": "TextVQA/images/37b20486199a4dc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the team's uniform?" }, { "from": "gpt", "value": "The answer is Blue and Yellow" } ], "clip_score": 24.109375, "loss": 2.896908760070801 }, { "id": "identity_230135", "image": "UniGeo/images/797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AC=5,BC=12,AB=13,求sin∠B和cos∠B的值。" }, { "from": "gpt", "value": "The answer is sin∠B = 5/13 and cos∠B = 12/13" } ], "clip_score": 27.140625, "loss": 0.7644904255867004 }, { "id": "identity_322092", "image": "UniGeo/images/1340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O的半径为2,点O到直线l距离为3,点P是直线l上的一个动点,PQ切⊙O于点Q,则PQ的最小值是多少?\nChoices:\n(A) √{5}\n(B) √{13}\n(C) 2\n(D) 3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.265625, "loss": 2.456101179122925 }, { "id": "identity_166973", "image": "TabMWP/images/3028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the children's weights?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 30.71875, "loss": 4.126391887664795 }, { "id": "identity_127802", "image": "VQA2.0/images/525318.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of object is to the left of the bottle in the photo on the phone screen?" }, { "from": "gpt", "value": "The answer is Paper or document" } ], "clip_score": 27.71875, "loss": 4.513705730438232 }, { "id": "identity_50326", "image": "FigureQA/images/13415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bar with the longest length?" }, { "from": "gpt", "value": "The answer is Magenta" } ], "clip_score": 27.5, "loss": 3.1445047855377197 }, { "id": "identity_204627", "image": "TextVQA/images/a76403bdedd2c6c2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What platform is this product designed for as indicated in the image?" }, { "from": "gpt", "value": "The answer is IBM" } ], "clip_score": 12.4140625, "loss": 3.3439109325408936 }, { "id": "identity_105469", "image": "TextVQA/images/c267f9e4b6079d9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the release date of the movie?" }, { "from": "gpt", "value": "The answer is June 6" } ], "clip_score": 26.65625, "loss": 2.878110647201538 }, { "id": "identity_185236", "image": "PMC-VQA/images/PMC6331538_F6_422265.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of microscopy technique is likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Fluorescence microscopy" } ], "clip_score": 27.421875, "loss": 1.347960114479065 }, { "id": "identity_335153", "image": "VQA-AS/images/13913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are they doing?" }, { "from": "gpt", "value": "The answer is relaxing" } ], "clip_score": 20.234375, "loss": 3.2505576610565186 }, { "id": "identity_24387", "image": "DocVQA/images/ntlg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the auth. no. of Frances Ellen Brock?" }, { "from": "gpt", "value": "The answer is 4754" } ], "clip_score": 20.609375, "loss": 2.651731491088867 }, { "id": "identity_267507", "image": "Geometry3K/images/1430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that triangle LMN is congruent to triangle QRS, what is the value of y?\nChoices:\n(A) 10\n(B) 20\n(C) 40\n(D) 50" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.015625, "loss": 3.1531786918640137 }, { "id": "identity_66452", "image": "TQA/images/question_images/em_spectrum_6819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which section of the electromagnetic spectrum has a frequency of 10^12 Hz?" }, { "from": "gpt", "value": "The answer is Gamma Ray" } ], "clip_score": 29.640625, "loss": 1.861268162727356 }, { "id": "identity_23379", "image": "PMC-VQA/images/PMC4520183_Fig2_410686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the FLAIR image show hyperintense lesions other than the right temporal lobe?\nChoices:\n(A) Frontal lobe \n(B) Left temporal lobe \n(C) No other area \n(D) Deep white matter " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.765625, "loss": 3.8893675804138184 }, { "id": "identity_114163", "image": "A-OKVQA/images/16403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vessel is visible in the background on the right?" }, { "from": "gpt", "value": "The answer is A boat or ship" } ], "clip_score": 15.25, "loss": 3.960841178894043 }, { "id": "identity_38479", "image": "ScienceQA/images/441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Kayla and Janet trade to each get what they want?\nChoices:\n(A) Kayla can trade her tomatoes for Janet's carrots.\n(B) Kayla can trade her tomatoes for Janet's broccoli.\n(C) Janet can trade her broccoli for Kayla's oranges.\n(D) Janet can trade her almonds for Kayla's tomatoes." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.484375, "loss": 3.3488497734069824 }, { "id": "identity_297340", "image": "DVQA/images/bar_train_00120190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.390625, "loss": 4.253195285797119 }, { "id": "identity_283307", "image": "VQA-AS/images/14954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can be found in the grass?" }, { "from": "gpt", "value": "The answer is football, slide" } ], "clip_score": 28.09375, "loss": 3.7017033100128174 }, { "id": "identity_264678", "image": "FigureQA/images/76239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Web Gray or Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.625, "loss": 3.937004804611206 }, { "id": "identity_24444", "image": "TextVQA/images/664f1642eaebd56e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many years?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 12.46875, "loss": 1.735266089439392 }, { "id": "identity_150635", "image": "ScienceQA/images/5326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Would the object in the image take the shape of its container?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.390625, "loss": 3.0859923362731934 }, { "id": "identity_39213", "image": "PlotQA/images/99017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage points between the highest and lowest values for Mauritius?" }, { "from": "gpt", "value": "The answer is Approximately 10 percentage points" } ], "clip_score": 23.4375, "loss": 2.198394298553467 }, { "id": "identity_71704", "image": "ScienceQA/images/8633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which longitude is the transition from negative to positive air temperatures observed?" }, { "from": "gpt", "value": "The answer is 0° longitude" } ], "clip_score": 22.875, "loss": 2.4122931957244873 }, { "id": "identity_167440", "image": "IconQA/images/21235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of yellow stripes to the total number of stripes on all marbles?" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 29.34375, "loss": 1.7863019704818726 }, { "id": "identity_17330", "image": "UniGeo/images/1161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC为⊙O的内接三角形,∠BOC=80°,则∠A等于()\nChoices:\n(A) 80\n(B) 60\n(C) 50\n(D) 40" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.0625, "loss": 2.7981934547424316 }, { "id": "identity_272671", "image": "UniGeo/images/1774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is the diameter of circle O, CD is a chord. If ∠CAB = 65°, what is the measure of angle D?\nChoices:\n(A) 65°\n(B) 40°\n(C) 25°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.09375, "loss": 2.5564095973968506 }, { "id": "identity_31810", "image": "ScienceQA/images/15081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Delaware?\nChoices:\n(A) Wilmington\n(B) Augusta\n(C) Dover\n(D) Boston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.0, "loss": 3.9838829040527344 }, { "id": "identity_147664", "image": "DocVQA/images/hzhd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the closing phrase used by the sender of the letter?" }, { "from": "gpt", "value": "The answer is With kindest regards" } ], "clip_score": 20.78125, "loss": 3.059420585632324 }, { "id": "identity_215306", "image": "A-OKVQA/images/536145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.359375, "loss": 3.163269519805908 }, { "id": "identity_138623", "image": "DVQA/images/bar_train_00022091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between 'foam' in the 'minute' category and 'speech' in the 'sky' category?" }, { "from": "gpt", "value": "The answer is 60%" } ], "clip_score": 26.265625, "loss": 2.2027363777160645 }, { "id": "identity_65116", "image": "AI2D/images/1786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of food for the Cephalopod in this food web?" }, { "from": "gpt", "value": "The answer is Shrimp, Smaller fish, and Plankton" } ], "clip_score": 38.09375, "loss": 2.073232889175415 }, { "id": "identity_124289", "image": "PMC-VQA/images/PMC3915685_f1-mco-01-05-0851_264807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What condition is likely indicated by the findings in this MRI image?" }, { "from": "gpt", "value": "The answer is Ischemic stroke" } ], "clip_score": 35.75, "loss": 2.7881240844726562 }, { "id": "identity_59657", "image": "DVQA/images/bar_train_00023444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bars representing the \"bride\" category?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.28125, "loss": 2.540330171585083 }, { "id": "identity_250233", "image": "TabMWP/images/37423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Celine buys 3 kilograms of shank, 2 kilograms of brisket, 4 kilograms of rib, and 1 kilogram of round, how much will she spend in total?" }, { "from": "gpt", "value": "The answer is 20.02" } ], "clip_score": 25.40625, "loss": 2.7333414554595947 }, { "id": "identity_314686", "image": "A-OKVQA/images/329469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person on the right riding?\nChoices:\n(A) scooter\n(B) quad\n(C) bicycle\n(D) motorcycle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.0, "loss": 4.424347877502441 }, { "id": "identity_170663", "image": "TabMWP/images/28927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the y-values when x increases by 1?" }, { "from": "gpt", "value": "The answer is 6 and 2" } ], "clip_score": 29.0625, "loss": 2.810336112976074 }, { "id": "identity_34028", "image": "DocVQA/images/ffpw0079_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total amount of china's contribution ?" }, { "from": "gpt", "value": "The answer is $14,580" } ], "clip_score": 23.046875, "loss": 1.916191577911377 }, { "id": "identity_172514", "image": "TabMWP/images/18152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Doug decides to buy 2 pounds of raspberry jam and 3 pounds of marmalade instead, how much will he spend in total?" }, { "from": "gpt", "value": "The answer is $27" } ], "clip_score": 31.765625, "loss": 2.6852359771728516 }, { "id": "identity_171663", "image": "CLEVR-Math/images/CLEVR_train_037435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither green nor metallic?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.390625, "loss": 2.577249765396118 }, { "id": "identity_190813", "image": "VQA2.0/images/21212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 16.921875, "loss": 2.7454845905303955 }, { "id": "identity_29723", "image": "A-OKVQA/images/406139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the woman holding an umbrella?\nChoices:\n(A) block sun\n(B) staying dry\n(C) block wind\n(D) to dance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.390625, "loss": 4.336687088012695 }, { "id": "identity_271390", "image": "DocVQA/images/nfnw0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please provide me with the account number?" }, { "from": "gpt", "value": "The answer is da0608" } ], "clip_score": 18.765625, "loss": 4.02500057220459 }, { "id": "identity_148593", "image": "DocVQA/images/lfpf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fat is mentioned as an alternative to solid shortening?" }, { "from": "gpt", "value": "The answer is Polyunsaturated fat" } ], "clip_score": 17.3125, "loss": 2.508042097091675 }, { "id": "identity_114795", "image": "VizWiz/images/VizWiz_train_00005097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the telephone?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 27.5625, "loss": 3.403764486312866 }, { "id": "identity_114736", "image": "Super-CLEVR/images/superCLEVR_new_000224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are present in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.625, "loss": 2.3743362426757812 }, { "id": "identity_115333", "image": "VQA-AS/images/14148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is depicted in the painting on the wall?" }, { "from": "gpt", "value": "The answer is A landscape with hills and sky" } ], "clip_score": 21.8125, "loss": 3.4088542461395264 }, { "id": "identity_70463", "image": "ScienceQA/images/9878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states share a border with the highlighted state?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.5625, "loss": 2.2727103233337402 }, { "id": "identity_107242", "image": "VQA2.0/images/239001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is on the beach?" }, { "from": "gpt", "value": "The answer is A cow" } ], "clip_score": 27.71875, "loss": 3.2431552410125732 }, { "id": "identity_315645", "image": "VQA-RAD/images/synpic49027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is another pathology?" }, { "from": "gpt", "value": "The answer is moderate edema" } ], "clip_score": 26.71875, "loss": 3.771526575088501 }, { "id": "identity_167295", "image": "IconQA/images/6159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which picture is fifth in the sequence?" }, { "from": "gpt", "value": "The answer is A turtle" } ], "clip_score": 27.171875, "loss": 2.953885555267334 }, { "id": "identity_63470", "image": "ScienceQA/images/14928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders the highlighted state to the north?" }, { "from": "gpt", "value": "The answer is Missouri" } ], "clip_score": 31.65625, "loss": 3.4400177001953125 }, { "id": "identity_232723", "image": "TQA/images/question_images/velocity_time_graphs_8215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average speed of the object represented by the blue line from the start to the end of the graph?" }, { "from": "gpt", "value": "The answer is 1 unit per unit time" } ], "clip_score": 31.34375, "loss": 2.2512245178222656 }, { "id": "identity_143477", "image": "PlotQA/images/112973.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of days required to export and import goods in Costa Rica in 2012?" }, { "from": "gpt", "value": "The answer is 1 day" } ], "clip_score": 42.34375, "loss": 2.273085355758667 }, { "id": "identity_28196", "image": "DocVQA/images/skcv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Birth Date ?" }, { "from": "gpt", "value": "The answer is 12/29/11" } ], "clip_score": 19.875, "loss": 2.1675257682800293 }, { "id": "identity_16650", "image": "AI2D/images/1492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What follows evaporation?\nChoices:\n(A) none of the above\n(B) infiltration\n(C) condensation\n(D) percolation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.109375, "loss": 4.192748069763184 }, { "id": "identity_322209", "image": "DocVQA/images/hfhb0018_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What day of the week is this?" }, { "from": "gpt", "value": "The answer is Thursday" } ], "clip_score": 19.328125, "loss": 2.2739274501800537 }, { "id": "identity_117904", "image": "TextVQA/images/2c9b85d8357eae90.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total gold value of the items held by the characters on the \"Wight Brothers\" card?" }, { "from": "gpt", "value": "The answer is 800 gold pieces" } ], "clip_score": 23.109375, "loss": 1.8796007633209229 }, { "id": "identity_289754", "image": "PMC-VQA/images/PMC4579082_pbio.1002253.g002_425907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which muscle is visible in the transverse sections shown in panels (e,f)? \nOptions:\n(A) Soleus\n(B) Masseter\n(C) Biceps brachii\n(D) Pectoralis minor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.217895984649658 }, { "id": "identity_334024", "image": "PlotQA/images/32689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the amount of goods exported in one year to that in the following year?" }, { "from": "gpt", "value": "The answer is 1.351" } ], "clip_score": 25.84375, "loss": 2.129472017288208 }, { "id": "identity_113309", "image": "TextVQA/images/1949fb1c7ecd6bad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the hull of the boat?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.078125, "loss": 3.0407164096832275 }, { "id": "identity_238371", "image": "Super-CLEVR/images/superCLEVR_new_001996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many metal objects are either yellow road bikes, blue aeroplanes, or purple cars?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.328125, "loss": 2.564913749694824 }, { "id": "identity_38669", "image": "A-OKVQA/images/390658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are there blue lights on the trees?\nChoices:\n(A) for racing\n(B) for climbing\n(C) for holiday\n(D) to signal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.307885646820068 }, { "id": "identity_55458", "image": "FigureQA/images/2657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the proportion of Medium Aqua greater than Firebrick?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.828125, "loss": 3.580657482147217 }, { "id": "identity_217136", "image": "A-OKVQA/images/514563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cyclists are wearing helmets?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 28.734375, "loss": 4.631809711456299 }, { "id": "identity_150229", "image": "DocVQA/images/sxwg0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the institution where the responsible investigator is located?" }, { "from": "gpt", "value": "The answer is Medical College of Alabama, Birmingham Alabama" } ], "clip_score": 25.703125, "loss": 2.3907408714294434 }, { "id": "identity_137983", "image": "ChartQA/images/multi_col_40371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which decade saw the highest increase in waste generation and discards combined?" }, { "from": "gpt", "value": "The answer is 1980s" } ], "clip_score": 26.796875, "loss": 1.5255029201507568 }, { "id": "identity_312271", "image": "GeoQA+/images/5342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在三角形中,点D是BC上的一点,已知ÐDAC=30°,ÐDAB=75°,CE平分ÐACB交AB于点E,连接DE,则ÐDEC=()\n选择:\n(A) 10°\n(B) 15°\n(C) 20°\n(D) 25°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.359375, "loss": 2.525674343109131 }, { "id": "identity_248717", "image": "VQA-AS/images/7412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of objects are present in the image, and what are they?" }, { "from": "gpt", "value": "The answer is 9 types of objects: log, skateboard, sandbox, bucket, football, boy, girl, tree, sun" } ], "clip_score": 30.6875, "loss": 1.743804693222046 }, { "id": "identity_33473", "image": "TextVQA/images/4d2402ac5a0fce34.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time is it?" }, { "from": "gpt", "value": "The answer is 10:15" } ], "clip_score": 15.4140625, "loss": 1.6335365772247314 }, { "id": "identity_152441", "image": "ScienceQA/images/7327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 31.203125, "loss": 3.208327531814575 }, { "id": "identity_136546", "image": "FigureQA/images/75601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the y-axis label value where the Dark Blue line ends?" }, { "from": "gpt", "value": "The answer is 98.4" } ], "clip_score": 29.46875, "loss": 2.4332456588745117 }, { "id": "identity_239874", "image": "DVQA/images/bar_train_00106175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values of the bars that have values larger than 2?" }, { "from": "gpt", "value": "The answer is 6.0" } ], "clip_score": 17.84375, "loss": 2.116804361343384 }, { "id": "identity_40028", "image": "DocVQA/images/xnvd0227_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the document title?" }, { "from": "gpt", "value": "The answer is Biochemical Data" } ], "clip_score": 23.390625, "loss": 4.009832382202148 }, { "id": "identity_156688", "image": "ScienceQA/images/18502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the temperature of the grill increased uniformly, what would be the expected final temperature of the salmon on the left half of the grill?" }, { "from": "gpt", "value": "The answer is 45°C" } ], "clip_score": 29.953125, "loss": 2.142911672592163 }, { "id": "identity_265045", "image": "PMC-VQA/images/PMC7417317_FIG2_232181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the MRI of the right shoulder?\nChoices:\n(A) A normal shoulder\n(B) Severe rotator cuff tendinopathy with a full thickness rotator cuff tear\n(C) Mild rotator cuff tendinopathy with a partial thickness rotator cuff tear\n(D) Biceps tendinitis of the shoulder." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.375, "loss": 4.621211051940918 }, { "id": "identity_208988", "image": "VQA2.0/images/565444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the motorcycle parked in a designated parking spot?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.015625, "loss": 3.1442370414733887 }, { "id": "identity_196610", "image": "PMC-VQA/images/PMC6919442_F1_86269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any nodules present in the left lung?" }, { "from": "gpt", "value": "The answer is Yes, there are nodules present" } ], "clip_score": 29.453125, "loss": 2.341106653213501 }, { "id": "identity_282781", "image": "PMC-VQA/images/PMC5139043_Fig3_102563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to clearly display the focal area of moderate FCH uptake in the right prostatic bed?\nChoices:\n(A) MRI\n(B) CT\n(C) PET\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.46875, "loss": 4.013251781463623 }, { "id": "identity_316705", "image": "ChartQA/images/multi_col_100100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the product of the highest and lowest value of enterprises that survived this long?" }, { "from": "gpt", "value": "The answer is 1841.98" } ], "clip_score": 14.734375, "loss": 2.609889268875122 }, { "id": "identity_279989", "image": "PMC-VQA/images/PMC7329524_Fig3_203295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the significance of the green arrow in the CT scan pointing to?\nChoices:\n(A) Bowel perforation \n(B) Ureteronephrosis \n(C) Hydronephrosis \n(D) Biliary obstruction." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.09375, "loss": 3.888817310333252 }, { "id": "identity_232965", "image": "DVQA/images/bar_train_00134731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of algorithm one across all datasets, and how does it compare to the average accuracy of algorithm two across all datasets?" }, { "from": "gpt", "value": "The answer is The average accuracy of algorithm one is 6.25, and it is 0.5 higher than the average accuracy of algorithm two" } ], "clip_score": 33.0, "loss": 1.5266813039779663 }, { "id": "identity_309992", "image": "Geometry3K/images/2059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For the trapezoid shown, M and P are midpoints of the legs. If the length of P M is 2 x, the length of Q R is 3 x, and the length of T S is 10, determine the value of P M.\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) 20" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.6875, "loss": 2.540060520172119 }, { "id": "identity_210338", "image": "A-OKVQA/images/252188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the base of the vase?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 25.421875, "loss": 4.303238391876221 }, { "id": "identity_160696", "image": "GeoQA+/images/5817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 29.28125, "loss": 1.7888209819793701 }, { "id": "identity_184758", "image": "PMC-VQA/images/PMC4643987_pone.0142956.g008_443715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 14.7109375, "loss": 2.6075453758239746 }, { "id": "identity_44314", "image": "FigureQA/images/96404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Cyan line consistently above the Dark Gold line throughout the graph?" }, { "from": "gpt", "value": "The answer is No, they intersect" } ], "clip_score": 28.0, "loss": 3.8076329231262207 }, { "id": "identity_332885", "image": "IconQA/images/90668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is depicted here?" }, { "from": "gpt", "value": "The answer is 2,850" } ], "clip_score": 23.796875, "loss": 2.8815691471099854 }, { "id": "identity_326439", "image": "DVQA/images/bar_train_00090684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have accuracy higher than 10 in at least one of these datasets?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 33.09375, "loss": 3.647134304046631 }, { "id": "identity_14801", "image": "ScienceQA/images/4299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which i in row B?\nChoices:\n(A) the theater\n(B) the fire department\n(C) the grocery store\n(D) the fast-food restaurant" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.625, "loss": 3.9950695037841797 }, { "id": "identity_95456", "image": "TabMWP/images/22581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in chocolate consumption per capita between the country with the highest consumption and the country with the lowest consumption in 2005?" }, { "from": "gpt", "value": "The answer is 7 kg" } ], "clip_score": 35.1875, "loss": 2.045849323272705 }, { "id": "identity_326335", "image": "GeoQA+/images/388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A set of triangles is arranged as shown in the image, with point A on edge EF and point D on edge BC. If EF is parallel to BC, what is the measure of ∠AOD?\n\nChoices:\n(A) 75°\n(B) 90°\n(C) 105°\n(D) 120°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.1875, "loss": 2.8549909591674805 }, { "id": "identity_294029", "image": "TabMWP/images/23697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which day did the city library observe the highest number of books being checked out?\nChoices:\n(A) Tuesday\n(B) Wednesday\n(C) Thursday\n(D) Friday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.6875, "loss": 4.092177867889404 }, { "id": "identity_109392", "image": "VQA2.0/images/458374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the background color in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 12.7734375, "loss": 3.26538348197937 }, { "id": "identity_242224", "image": "DVQA/images/bar_train_00145287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer objects in the 'cent' category across all objects?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 23.796875, "loss": 2.324449300765991 }, { "id": "identity_282585", "image": "PMC-VQA/images/PMC6822922_FIG1_59149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location of the osteoid osteoma in the patient's body? \nOptions:\n(A) Right tibia\n(B) Right femur\n(C) Left tibia\n(D) Left femur" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 4.670884609222412 }, { "id": "identity_80017", "image": "Geometry3K/images/1599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the isosceles triangle?" }, { "from": "gpt", "value": "The answer is 55.29 square units" } ], "clip_score": 36.65625, "loss": 2.163745880126953 }, { "id": "identity_121670", "image": "Super-CLEVR/images/superCLEVR_new_000936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the gold car relative to the green airplane?" }, { "from": "gpt", "value": "The answer is Behind and to the left" } ], "clip_score": 28.9375, "loss": 3.0803956985473633 }, { "id": "identity_222590", "image": "Super-CLEVR/images/superCLEVR_new_001295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles in the image?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 21.84375, "loss": 2.0832767486572266 }, { "id": "identity_28267", "image": "ScienceQA/images/18345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this time, thermal energy was transferred from () to ().\nChoices:\n(A) the surroundings . . . each bottle\n(B) each bottle . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.0, "loss": 3.8073501586914062 }, { "id": "identity_274540", "image": "VQA2.0/images/282703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is someone preparing to engage in a craft project?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 18.671875, "loss": 3.0593159198760986 }, { "id": "identity_86579", "image": "UniGeo/images/3173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of segment CD is 10 units, what is the length of segment BE?" }, { "from": "gpt", "value": "The answer is 15 units" } ], "clip_score": 27.09375, "loss": 1.9143810272216797 }, { "id": "identity_240705", "image": "Super-CLEVR/images/superCLEVR_new_000067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined color of the metallic truck and the green shiny motorbike?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 31.984375, "loss": 4.135743141174316 }, { "id": "identity_110222", "image": "VizWiz/images/VizWiz_train_00023808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object on the computer screen?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 24.65625, "loss": 4.341049671173096 }, { "id": "identity_146874", "image": "DocVQA/images/pfjh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date when the invoice was stamped?" }, { "from": "gpt", "value": "The answer is January 22, 1969" } ], "clip_score": 26.9375, "loss": 1.7912628650665283 }, { "id": "identity_2329", "image": "ScienceQA/images/17916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the text, how does a sloth's fur help protect it?\nChoices:\n(A) A sloth's fur protects its important organs.\n(B) A sloth's fur helps it hide from predators.\n(C) A sloth's fur helps it cling to tree branches." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.9375, "loss": 4.298039436340332 }, { "id": "identity_328140", "image": "AI2D/images/2627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, what is the temperature closest to the equator?\nChoices:\n(A) Cool\n(B) Cold\n(C) Hot\n(D) Warm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.40625, "loss": 4.038139820098877 }, { "id": "identity_41942", "image": "FigureQA/images/16473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Orchid occupy more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 40.84375, "loss": 3.1226110458374023 }, { "id": "identity_115575", "image": "TextVQA/images/e271d16f9a60e06e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bookstore?" }, { "from": "gpt", "value": "The answer is Camilla's Bookshop" } ], "clip_score": 29.90625, "loss": 2.409299373626709 }, { "id": "identity_24930", "image": "ScienceQA/images/15123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property do these four objects have in common?\nChoices:\n(A) sticky\n(B) salty\n(C) stretchy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.640625, "loss": 4.086112022399902 }, { "id": "identity_194564", "image": "A-OKVQA/images/155997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible bicycle helmet in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.734375, "loss": 2.9976704120635986 }, { "id": "identity_141091", "image": "MapQA/images/map_8838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Florida have a higher value than Georgia?" }, { "from": "gpt", "value": "The answer is Yes, Florida has a higher value than Georgia" } ], "clip_score": 25.90625, "loss": 1.4508360624313354 }, { "id": "identity_121140", "image": "VQA2.0/images/558108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bird is flying in the image?" }, { "from": "gpt", "value": "The answer is Seagull" } ], "clip_score": 20.921875, "loss": 2.482318162918091 }, { "id": "identity_36666", "image": "A-OKVQA/images/314390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: This type of event should create what kind of mood for the people attending?\nChoices:\n(A) excited\n(B) joyous\n(C) bored\n(D) angry" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.3671875, "loss": 4.492593765258789 }, { "id": "identity_22432", "image": "PMC-VQA/images/PMC6022295_Fig2_334194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the specific finding on the abdominal CT? \nChoices:\n(A) multiple nodules \n(B) diverticulitis \n(C) ulcers \n(D) polypoid lesions " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.21875, "loss": 4.538163185119629 }, { "id": "identity_144730", "image": "DocVQA/images/sykk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different publications are mentioned on the page?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 18.25, "loss": 2.4067981243133545 }, { "id": "identity_146487", "image": "DVQA/images/bar_train_00104756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest accuracy value for the 'lie' dataset?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 30.796875, "loss": 2.4447648525238037 }, { "id": "identity_66317", "image": "ScienceQA/images/14357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of nuts can be seen in Shelley's lunch?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.3125, "loss": 2.2459590435028076 }, { "id": "identity_311856", "image": "UniGeo/images/661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,工地上竖着两根电线杆,分别自两杆上高出地面10m,15m的某处向两侧地面上的点处拉钢索将两杆固定,此时钢索的交点离地面的高度是()\nChoices:\n(A) 3m\n(B) 4m\n(C) 6m\n(D) 不能确定" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.4375, "loss": 2.550276279449463 }, { "id": "identity_170442", "image": "TabMWP/images/30023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost per pound if you buy 1 pound of split washers and 2 pounds of rivets?" }, { "from": "gpt", "value": "The answer is $4.33" } ], "clip_score": 33.0625, "loss": 2.161785125732422 }, { "id": "identity_272782", "image": "TQA/images/question_images/faults_1738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, how many different types of movement are depicted?\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 1" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.921875, "loss": 3.167316436767578 }, { "id": "identity_293940", "image": "ScienceQA/images/10881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive interaction with each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.71875, "loss": 3.9335293769836426 }, { "id": "identity_60541", "image": "ChartQA/images/multi_col_100761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of respondents aged 30-44 who are 'Very likely' to respond?" }, { "from": "gpt", "value": "The answer is 16%" } ], "clip_score": 25.40625, "loss": 1.9132654666900635 }, { "id": "identity_41650", "image": "FigureQA/images/30469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Steel Blue section in the pie chart?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 36.15625, "loss": 1.630035400390625 }, { "id": "identity_83058", "image": "Geometry3K/images/628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AB?" }, { "from": "gpt", "value": "The answer is 28" } ], "clip_score": 31.671875, "loss": 2.9332563877105713 }, { "id": "identity_185090", "image": "A-OKVQA/images/62387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is advertised by the sign with the word \"FASHION\" on it?" }, { "from": "gpt", "value": "The answer is Clothing store" } ], "clip_score": 13.0546875, "loss": 2.5664944648742676 }, { "id": "identity_291239", "image": "A-OKVQA/images/345944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is seen floating in the water behind a stationary boat?\nChoices:\n(A) medicine ball\n(B) beach ball\n(C) anchor\n(D) safety float" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 11.34375, "loss": 3.7570693492889404 }, { "id": "identity_319706", "image": "PMC-VQA/images/PMC9247462_F5_328791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI signal did these lesions show?\nChoices:\n(A) T1-hyperintense\n(B) T2-hyperintense\n(C) T1-hypointense\n(D) T2-hypointense" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 4.088490009307861 }, { "id": "identity_273791", "image": "AI2D/images/632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which channel serves as the connection between the magma chamber and the exit? \n(A) main vent/conduit\n(B) none of the above\n(C) crater\n(D) ash cloud" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.1875, "loss": 3.971364974975586 }, { "id": "identity_89060", "image": "GeoQA+/images/4464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠1 is 50°, what is the measure of ∠EFG in the folded rectangle?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 37.4375, "loss": 1.6572400331497192 }, { "id": "identity_202836", "image": "TextVQA/images/ff1f0006533d8e6e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the product type mentioned on the label?" }, { "from": "gpt", "value": "The answer is Foreign Extra" } ], "clip_score": 18.921875, "loss": 4.06306266784668 }, { "id": "identity_311053", "image": "A-OKVQA/images/117071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which way is the green arrow indicating to walk?\nChoices:\n(A) turn left\n(B) turn right\n(C) straight\n(D) turn around" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.953125, "loss": 4.0339837074279785 }, { "id": "identity_207520", "image": "PMC-VQA/images/PMC6694554_Fig1_23746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the mass compared to the surrounding tissue?" }, { "from": "gpt", "value": "The answer is The mass is more radiodense than the surrounding tissue" } ], "clip_score": 25.40625, "loss": 1.7826553583145142 }, { "id": "identity_61149", "image": "DVQA/images/bar_train_00191095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value represented by any pattern in the category \"mail\"?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.40625, "loss": 2.613734006881714 }, { "id": "identity_306068", "image": "A-OKVQA/images/507966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who provided these items?\nChoices:\n(A) beach owner\n(B) homeless people\n(C) salvation army\n(D) beach goers" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.171875, "loss": 4.538392066955566 }, { "id": "identity_271617", "image": "Geometry3K/images/1506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If line ED is tangent to circle F at point E, what is the value of x?\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.8125, "loss": 3.146179437637329 }, { "id": "identity_232143", "image": "FigureQA/images/92323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At x = 50, what is the difference between the values of Saddle Brown and Crimson?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.09375, "loss": 2.8606927394866943 }, { "id": "identity_19637", "image": "TabMWP/images/36504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.5625, "loss": 4.152300834655762 }, { "id": "identity_201478", "image": "A-OKVQA/images/406646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date stamped on the image?" }, { "from": "gpt", "value": "The answer is January 19, 2010" } ], "clip_score": 13.2578125, "loss": 1.5766980648040771 }, { "id": "identity_68489", "image": "TQA/images/question_images/human_system_eye_6135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled at the top right of the image?" }, { "from": "gpt", "value": "The answer is Sclera" } ], "clip_score": 30.6875, "loss": 2.560051441192627 }, { "id": "identity_174839", "image": "TabMWP/images/13516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stores have more than 70 pairs of shoes?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.1875, "loss": 2.3528730869293213 }, { "id": "identity_265214", "image": "VizWiz/images/VizWiz_train_00005558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do these fruits belong to the strawberry category?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 29.0625, "loss": 3.5518956184387207 }, { "id": "identity_19248", "image": "TQA/images/question_images/convection_of_air_8048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Conduction is the transfer of heat energy by\nChoices:\n(A) molecular contact\n(B) movement through a vacuum\n(C) density differences\n(D) electromagnetic waves" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.09375, "loss": 3.9798035621643066 }, { "id": "identity_51369", "image": "FigureQA/images/49439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 34.3125, "loss": 3.5444273948669434 }, { "id": "identity_222304", "image": "A-OKVQA/images/432657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.0, "loss": 2.4769155979156494 }, { "id": "identity_197791", "image": "VQA2.0/images/490518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.109375, "loss": 4.639772891998291 }, { "id": "identity_122649", "image": "A-OKVQA/images/26506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cows are visible in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 24.03125, "loss": 4.3045878410339355 }, { "id": "identity_278955", "image": "UniGeo/images/76.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, given that AB is parallel to DE, ∠ABC = 70°, and ∠CDE = 140°, what is the value of ∠BCD?\nChoices:\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.78125, "loss": 2.3050479888916016 }, { "id": "identity_299388", "image": "TabMWP/images/33295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the daily new customer numbers observed by the cable company analyst?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 38.53125, "loss": 3.850383996963501 }, { "id": "identity_233717", "image": "TextVQA/images/5f2b56adf8d31c9e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct letters are visible on the sign on the right, and what is the first letter of the sign?" }, { "from": "gpt", "value": "The answer is 5 distinct letters, first letter is m" } ], "clip_score": 12.1796875, "loss": 2.5434722900390625 }, { "id": "identity_186842", "image": "VQA2.0/images/233904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the socks of the player with jersey number 11?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.03125, "loss": 3.8645331859588623 }, { "id": "identity_296122", "image": "VizWiz/images/VizWiz_train_00019467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please inform me of what this says?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 10.5234375, "loss": 3.855522394180298 }, { "id": "identity_209274", "image": "A-OKVQA/images/559902.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels does the cart have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.0, "loss": 3.984257459640503 }, { "id": "identity_111665", "image": "VizWiz/images/VizWiz_train_00012211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cups are visible on the table?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.9375, "loss": 2.4580495357513428 }, { "id": "identity_252126", "image": "TextVQA/images/14860b06ba90af45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the text and symbols on the back of the watch?" }, { "from": "gpt", "value": "The answer is Commemorates the Mille Miglia race route and key locations" } ], "clip_score": 28.9375, "loss": 4.1045098304748535 }, { "id": "identity_238753", "image": "UniGeo/images/1555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AD平分∠BAC,按如下步骤作图:第一步,分别以点A、D为圆心,以大于\\(\\frac{1}{2}AD\\)的长为半径在AD两侧作弧,交于两点M、N;第二步,连接MN分别交AB、AC于点E、F;第三步,连接DE、DF.若BD=6,AF=4,CD=3,求△DEF的面积。" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.546875, "loss": 2.5035526752471924 }, { "id": "identity_285865", "image": "PlotQA/images/93454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name or heading of the X-axis?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 22.921875, "loss": 3.7754111289978027 }, { "id": "identity_312040", "image": "TextVQA/images/dd2e26e4072c8551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand are these packets?" }, { "from": "gpt", "value": "The answer is herbies" } ], "clip_score": 22.640625, "loss": 3.4960553646087646 }, { "id": "identity_22649", "image": "A-OKVQA/images/236282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the women in the lower right doing?\nChoices:\n(A) texting\n(B) petting\n(C) eating\n(D) photographing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.6875, "loss": 4.219534397125244 }, { "id": "identity_17701", "image": "Geometry3K/images/1170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find X Y.\nChoices:\n(A) 3.5\n(B) 7\n(C) 34\n(D) 41" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.21875, "loss": 3.002363681793213 }, { "id": "identity_327143", "image": "PMC-VQA/images/PMC5503588_F3_188338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of radiograph is depicted in the image?\nChoices:\n(A) Lateral radiograph\n(B) Oblique radiograph\n(C) AP radiograph\n(D) PA radiograph" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.28125, "loss": 4.4720234870910645 }, { "id": "identity_72087", "image": "TQA/images/question_images/acid_rain_formation_6505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the image, what is the source of the acidic gases?" }, { "from": "gpt", "value": "The answer is Factory or industrial plant" } ], "clip_score": 23.828125, "loss": 3.18746280670166 }, { "id": "identity_329016", "image": "VQA2.0/images/102096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a water bottle attached to this?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.078125, "loss": 3.183962345123291 }, { "id": "identity_55401", "image": "FigureQA/images/32671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage value of the Crimson segment?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 31.140625, "loss": 1.7756894826889038 }, { "id": "identity_155671", "image": "ScienceQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Mary and Lisa were to combine their lists, how many food items would they have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.484375, "loss": 2.2157485485076904 }, { "id": "identity_317385", "image": "VQA2.0/images/352523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image in black and white?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.3828125, "loss": 3.4664559364318848 }, { "id": "identity_15511", "image": "TQA/images/abc_question_images/ocean_waves_17119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the distance between identical points in the adjacent cycles of a waveform signal?\nChoices:\n(A) E\n(B) X\n(C) K\n(D) P" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.0, "loss": 3.264754056930542 }, { "id": "identity_227554", "image": "IconQA/images/69152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more airplanes are there in the rightmost column compared to the leftmost column?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.015625, "loss": 2.515939950942993 }, { "id": "identity_273398", "image": "PlotQA/images/38380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which is greater: the difference in health care expenditure between Least Developed Countries and Middle East & North Africa (all income levels) in 1996, or the difference in health care expenditure between the same regions in 1995?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 37.125, "loss": 3.2618823051452637 }, { "id": "identity_69910", "image": "ScienceQA/images/2034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many points does the arrowhead have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 27.96875, "loss": 4.492909908294678 }, { "id": "identity_19054", "image": "TextVQA/images/33ab1264075bb427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the length of the ruler?" }, { "from": "gpt", "value": "The answer is 5 cm" } ], "clip_score": 19.171875, "loss": 2.0460450649261475 }, { "id": "identity_173911", "image": "TabMWP/images/11777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you have a budget of $20, how many pounds of pumpkin muffin mix can you purchase?" }, { "from": "gpt", "value": "The answer is 7.81 pounds" } ], "clip_score": 29.75, "loss": 2.239396095275879 }, { "id": "identity_331969", "image": "PMC-VQA/images/PMC5362993_Fig2_144559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the asterisk indicate in these scans?\nChoices:\n(A) Lung metastasis\n(B) Mediastinal adenopathy\n(C) Hilar vessels\n(D) Mediastinal vessels" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.75, "loss": 4.910020351409912 }, { "id": "identity_19864", "image": "TextVQA/images/c6bcf285912b386e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of station is on the red sign?" }, { "from": "gpt", "value": "The answer is game trito station" } ], "clip_score": 20.765625, "loss": 4.25420618057251 }, { "id": "identity_201432", "image": "PMC-VQA/images/PMC5543595_Fig1_200059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is shown in this image?" }, { "from": "gpt", "value": "The answer is Thoracic region" } ], "clip_score": 28.390625, "loss": 2.5628013610839844 }, { "id": "identity_261181", "image": "TabMWP/images/22482.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of offices in the building?" }, { "from": "gpt", "value": "The answer is 911" } ], "clip_score": 29.671875, "loss": 2.776867151260376 }, { "id": "identity_281803", "image": "A-OKVQA/images/62387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At this moment, which direction can the cars moving forward potentially turn?\nChoices:\n(A) right\n(B) straight\n(C) u turn\n(D) left" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4453125, "loss": 4.1383819580078125 }, { "id": "identity_6684", "image": "A-OKVQA/images/58684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who founded the company shown in the building?\nChoices:\n(A) bill gates\n(B) tony fauci\n(C) donald trump\n(D) steve jobs" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.625, "loss": 4.010335445404053 }, { "id": "identity_170302", "image": "TabMWP/images/6839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many parking lots had more than 50 cars?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.09375, "loss": 2.479860782623291 }, { "id": "identity_303955", "image": "GeoQA+/images/2810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A,B,C在圆O上,∠ACB=35°,则∠AOB的度数是()\n选择:\n(A) 75°\n(B) 70°\n(C) 65°\n(D) 35°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.03125, "loss": 2.712026596069336 }, { "id": "identity_300840", "image": "Geometry3K/images/2088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle T.\nChoices:\n(A) 15\n(B) 35\n(C) 55\n(D) 60" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.375, "loss": 3.132678508758545 }, { "id": "identity_317734", "image": "ScienceQA/images/9142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can they trade to each get what they want?\nChoices:\n(A) One can trade broccoli for oranges.\n(B) One can trade tomatoes for broccoli.\n(C) One can trade almonds for tomatoes.\n(D) One can trade tomatoes for carrots." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.640625, "loss": 3.1395277976989746 }, { "id": "identity_108720", "image": "VQA2.0/images/516516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible logos or text on the snowboard?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.265625, "loss": 2.8368313312530518 }, { "id": "identity_207586", "image": "TextVQA/images/37a027b26e41793f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear are the individuals wearing?" }, { "from": "gpt", "value": "The answer is Sneakers" } ], "clip_score": 16.09375, "loss": 2.173165798187256 }, { "id": "identity_62714", "image": "ScienceQA/images/14119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 26.40625, "loss": 2.323312997817993 }, { "id": "identity_177260", "image": "TabMWP/images/34784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the jeweler decides to stock 20% more gold bands based on last year's sales, how many gold bands should she stock this year?" }, { "from": "gpt", "value": "The answer is 924" } ], "clip_score": 28.109375, "loss": 2.580159902572632 }, { "id": "identity_286658", "image": "GeoQA+/images/2868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, three points A, B, C are known on circle O. Lines AB, AC, OB, OC are drawn. Tangent BD intersects the extension of OC at point D. If ∠A = 25°, what is the degree measure of ∠D?\nChoices:\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.75, "loss": 2.385380268096924 }, { "id": "identity_152234", "image": "ScienceQA/images/19718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the final temperature of the water in both aquariums after the experiment?" }, { "from": "gpt", "value": "The answer is 16.0°C" } ], "clip_score": 32.5, "loss": 1.9049224853515625 }, { "id": "identity_38973", "image": "FigureQA/images/39338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.625, "loss": 3.5949454307556152 }, { "id": "identity_109415", "image": "PMC-VQA/images/PMC5746883_f3_258365.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the timestamp or date of the scan be seen in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.125, "loss": 2.600816249847412 }, { "id": "identity_326340", "image": "VizWiz/images/VizWiz_train_00002853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long do I cook this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.65625, "loss": 3.6606009006500244 }, { "id": "identity_157747", "image": "AI2D/images/1834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of Lemmings were to drastically decrease, which two organisms would be directly affected based on this food web?" }, { "from": "gpt", "value": "The answer is Arctic Foxes and Polar Bear" } ], "clip_score": 32.4375, "loss": 2.4250612258911133 }, { "id": "identity_161732", "image": "UniGeo/images/1068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle DAE?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 31.09375, "loss": 3.1263914108276367 }, { "id": "identity_239528", "image": "UniGeo/images/3290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BD是⊙O的直径,弦AC⊥BD,垂足为E,∠AOB=60°,求∠ADE的大小。" }, { "from": "gpt", "value": "The answer is 120°" } ], "clip_score": 30.0, "loss": 1.587202548980713 }, { "id": "identity_161656", "image": "UniGeo/images/1001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若∠D=32°,则∠B的度数是多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.875, "loss": 1.862221598625183 }, { "id": "identity_168006", "image": "IconQA/images/102098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of the spinner landing on blue?" }, { "from": "gpt", "value": "The answer is 3/4 or 0.75" } ], "clip_score": 29.234375, "loss": 1.1734169721603394 }, { "id": "identity_220963", "image": "PMC-VQA/images/PMC5307799_Fig1_127460.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Microscopy" } ], "clip_score": 28.59375, "loss": 2.8811535835266113 }, { "id": "identity_277657", "image": "UniGeo/images/2513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the circle ⊙O with a radius of 5, chords AB and CD correspond to central angles ∠AOB and ∠COD, respectively. Given that AB = 8, ∠AOB + ∠COD = 180°, what is the chord height of CD? \nChoices:\n(A) \\frac{5}{2}\n(B) 3\n(C) \\frac{7}{2}\n(D) 4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 39.6875, "loss": 2.3021273612976074 }, { "id": "identity_261599", "image": "FigureQA/images/55097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Maroon or Dark Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.875, "loss": 4.428511619567871 }, { "id": "identity_152692", "image": "ScienceQA/images/154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what can be inferred about the magnetic field interaction?" }, { "from": "gpt", "value": "The answer is The magnetic fields will repel each other" } ], "clip_score": 21.140625, "loss": 3.2531824111938477 }, { "id": "identity_270605", "image": "ScienceQA/images/2477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.671875, "loss": 3.647681713104248 }, { "id": "identity_99685", "image": "TabMWP/images/21411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which elementary school has the least number of students?" }, { "from": "gpt", "value": "The answer is Pine Elementary" } ], "clip_score": 31.0, "loss": 1.9587910175323486 }, { "id": "identity_220876", "image": "VQA2.0/images/73510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person standing on the vehicle wearing gloves?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.1875, "loss": 3.5773932933807373 }, { "id": "identity_191426", "image": "PMC-VQA/images/PMC5240324_Fig11_115021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.71875, "loss": 4.215090751647949 }, { "id": "identity_35281", "image": "VQA-AS/images/3206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What hangs is supposed to hang on the rack?" }, { "from": "gpt", "value": "The answer is coats" } ], "clip_score": 14.828125, "loss": 3.0758306980133057 }, { "id": "identity_219501", "image": "A-OKVQA/images/250427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the venue available for rent?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.765625, "loss": 2.9236347675323486 }, { "id": "identity_174915", "image": "TabMWP/images/12479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Henry has a coupon for $5 off any item. If he uses it on the pair of tennis shoes, how much will he pay for them?" }, { "from": "gpt", "value": "The answer is $14.00" } ], "clip_score": 30.265625, "loss": 1.8340914249420166 }, { "id": "identity_294888", "image": "MapQA/images/map_10744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend represented by a continuous bar?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.9140625, "loss": 3.4007811546325684 }, { "id": "identity_38725", "image": "A-OKVQA/images/263176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people ready to do?\nChoices:\n(A) eat\n(B) run\n(C) deplane\n(D) board" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.40625, "loss": 4.269223690032959 }, { "id": "identity_241190", "image": "TQA/images/abc_question_images/parts_skeleton_17271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the bones labeled R and T, and describe their anatomical relationship in terms of their position in the foot" }, { "from": "gpt", "value": "The answer is Calcaneus and Tarsals; Calcaneus is posterior to other Tarsals" } ], "clip_score": 30.109375, "loss": 2.6440622806549072 }, { "id": "identity_169775", "image": "TabMWP/images/21633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of voicemail messages stored on Jon's phone from Sunday to Thursday?" }, { "from": "gpt", "value": "The answer is 98" } ], "clip_score": 40.40625, "loss": 2.342219829559326 }, { "id": "identity_144943", "image": "FigureQA/images/48006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the Deep Pink bar greater than the value of the Dark Gray bar?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.078125, "loss": 3.7678732872009277 }, { "id": "identity_76745", "image": "AI2D/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many secondary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 29.421875, "loss": 4.896237850189209 }, { "id": "identity_246959", "image": "DVQA/images/bar_train_00194325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of all bars labeled 'goal' across all groups?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 27.125, "loss": 2.0727899074554443 }, { "id": "identity_81380", "image": "UniGeo/images/673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is a diameter of the circle and ∠AOC = 120°, what is the length of arc AC?" }, { "from": "gpt", "value": "The answer is 10π/3" } ], "clip_score": 36.65625, "loss": 1.9644685983657837 }, { "id": "identity_331392", "image": "A-OKVQA/images/495117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where might the people sleep?\nChoices:\n(A) car\n(B) trailer\n(C) field\n(D) horse" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.46875, "loss": 4.3392510414123535 }, { "id": "identity_149129", "image": "FigureQA/images/21593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Yellow greater than the combined values of Chocolate and Web Green?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 35.5, "loss": 3.111820697784424 }, { "id": "identity_132209", "image": "Super-CLEVR/images/superCLEVR_new_001531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.21875, "loss": 2.3947763442993164 }, { "id": "identity_169724", "image": "IconQA/images/89698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of five tally marks are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.03125, "loss": 2.0648839473724365 }, { "id": "identity_239064", "image": "UniGeo/images/1341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,点C、D在⊙O上,∠CDB=20°,过点C作⊙O的切线交AB的延长线于点E,求∠ECA的大小。" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 29.28125, "loss": 1.8550186157226562 }, { "id": "identity_18300", "image": "VQA-AS/images/1849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Has the grill been abandoned?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.78125, "loss": 3.296156406402588 }, { "id": "identity_287288", "image": "VQA-AS/images/18059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of grills?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.703125, "loss": 1.9543794393539429 }, { "id": "identity_109091", "image": "A-OKVQA/images/458673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style are the pillars designed in?" }, { "from": "gpt", "value": "The answer is Khmer architectural style" } ], "clip_score": 21.484375, "loss": 2.7143867015838623 }, { "id": "identity_41651", "image": "FigureQA/images/30469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Gold section in the pie chart?" }, { "from": "gpt", "value": "The answer is 80%" } ], "clip_score": 37.375, "loss": 1.877297043800354 }, { "id": "identity_127278", "image": "Super-CLEVR/images/superCLEVR_new_000417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more matte objects or shiny objects in the image?" }, { "from": "gpt", "value": "The answer is Matte" } ], "clip_score": 22.390625, "loss": 2.4940550327301025 }, { "id": "identity_184392", "image": "PMC-VQA/images/PMC8044440_f3_417269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence was likely used to obtain this image?" }, { "from": "gpt", "value": "The answer is T1-weighted MRI with contrast" } ], "clip_score": 33.34375, "loss": 1.6941968202590942 }, { "id": "identity_153122", "image": "ScienceQA/images/11929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is inscribed on the penny?" }, { "from": "gpt", "value": "The answer is 2001" } ], "clip_score": 35.3125, "loss": 1.723076343536377 }, { "id": "identity_244902", "image": "DocVQA/images/nxyc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many months after the survey period was the report published?" }, { "from": "gpt", "value": "The answer is 16 months" } ], "clip_score": 22.984375, "loss": 2.506680488586426 }, { "id": "identity_163197", "image": "GeoQA+/images/3270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若四边形ABCD是一个梯形,且对角线AC、BD的长都为20cm,且AD平行于BC,则四边形EFGH的高是多少厘米?" }, { "from": "gpt", "value": "The answer is √(20^2 - x^2/4)/2厘米" } ], "clip_score": 23.96875, "loss": 1.9793994426727295 }, { "id": "identity_10589", "image": "IconQA/images/22527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are on the frame?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 32.46875, "loss": 2.267171621322632 }, { "id": "identity_13921", "image": "TQA/images/question_images/faults_189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many types of wall are shown below?\nChoices:\n(A) 5\n(B) 4\n(C) 2\n(D) 3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.203125, "loss": 3.2560811042785645 }, { "id": "identity_286062", "image": "PMC-VQA/images/PMC3522500_fig2_173028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was revealed near the skin on the CT scan? Was it a cyst, tumor, calcification, or lymph nodes?\nChoices:\n(A) cyst\n(B) tumor\n(C) calcification\n(D) lymph nodes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 4.318482398986816 }, { "id": "identity_144525", "image": "DocVQA/images/sfhg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the agenda for the meeting?" }, { "from": "gpt", "value": "The answer is Review of Procedure Notes" } ], "clip_score": 24.421875, "loss": 3.814504861831665 }, { "id": "identity_78391", "image": "ScienceQA/images/20878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of nails in the image?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 19.71875, "loss": 4.471961975097656 }, { "id": "identity_61658", "image": "DocVQA/images/srpf0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What types of transportation charges are covered under the TIAA Major Medical plan?" }, { "from": "gpt", "value": "The answer is Professional ambulance, railroad, commercial flights" } ], "clip_score": 21.125, "loss": 3.659341335296631 }, { "id": "identity_173403", "image": "IconQA/images/69171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the transformed letter G a mirror image of the original letter G?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.171875, "loss": 3.2708065509796143 }, { "id": "identity_108538", "image": "PMC-VQA/images/PMC5368922_Fig4_146103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Transaxial/Axial orientation" } ], "clip_score": 27.921875, "loss": 2.9955251216888428 }, { "id": "identity_68577", "image": "ScienceQA/images/8638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Sean and Jason were to trade items, which item could Jason offer that Sean does not have?" }, { "from": "gpt", "value": "The answer is a hot dog, tomatoes, or almonds" } ], "clip_score": 27.5625, "loss": 2.412529706954956 }, { "id": "identity_275281", "image": "ScienceQA/images/16244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each pair. Which statement accurately describes the magnitude of the magnetic force in the given pairs? \nOptions:\n(A) The magnitude of the magnetic force is equal in both pairs.\n(B) The magnitude of the magnetic force is lesser in Pair 1.\n(C) The magnitude of the magnetic force is lesser in Pair 2." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.609375, "loss": 3.119673490524292 }, { "id": "identity_282379", "image": "TabMWP/images/5853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.8125, "loss": 4.440186023712158 }, { "id": "identity_106831", "image": "VQA2.0/images/579329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage is in the transparent glass in front of the person at the far end of the table?" }, { "from": "gpt", "value": "The answer is Beer" } ], "clip_score": 20.6875, "loss": 2.7142333984375 }, { "id": "identity_83302", "image": "Geometry3K/images/429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a sector is formed by a 60-degree central angle, what is the area of that sector?" }, { "from": "gpt", "value": "The answer is 3π/2 square inches" } ], "clip_score": 29.34375, "loss": 1.6767593622207642 }, { "id": "identity_32724", "image": "GeoQA+/images/1026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线AB∥CD,直线EF分别与直线AB,CD相交于点O与点G,OP平分∠EOB,若∠EOP=35°,则∠DGF的度数为()\nChoices:\n(A) 90°\n(B) 100°\n(C) 110°\n(D) 120°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.515625, "loss": 2.756129026412964 }, { "id": "identity_246498", "image": "ScienceQA/images/261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the physical properties of the material that make up the ring, and how do these properties classify it as a solid?" }, { "from": "gpt", "value": "The answer is The ring is made of a solid material with definite shape and volume" } ], "clip_score": 30.0625, "loss": 2.991436719894409 }, { "id": "identity_176313", "image": "TabMWP/images/12831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Malia plans to go on 20% more rides on her next visit on each day, how many rides will she go on Wednesday?" }, { "from": "gpt", "value": "The answer is 73.2" } ], "clip_score": 31.40625, "loss": 2.5660057067871094 }, { "id": "identity_182260", "image": "A-OKVQA/images/368515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car closest to the camera?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.359375, "loss": 4.500394344329834 }, { "id": "identity_38902", "image": "FigureQA/images/26252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of Brown to Light Gold in the pie chart?" }, { "from": "gpt", "value": "The answer is Brown:Light Gold > 1:1" } ], "clip_score": 38.75, "loss": 2.0286333560943604 }, { "id": "identity_281852", "image": "VQA-AS/images/18474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does a dog exist?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.046875, "loss": 3.6772360801696777 }, { "id": "identity_98927", "image": "IconQA/images/97642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are there in the first row?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.203125, "loss": 1.9282963275909424 }, { "id": "identity_5861", "image": "DocVQA/images/xtkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the contract no. ?" }, { "from": "gpt", "value": "The answer is DA-49-007-MD 544" } ], "clip_score": 17.671875, "loss": 2.942650079727173 }, { "id": "identity_20941", "image": "MapQA/images/map_4170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in the USA ?" }, { "from": "gpt", "value": "The answer is 4229-4252" } ], "clip_score": 28.125, "loss": 2.740593194961548 }, { "id": "identity_209324", "image": "PMC-VQA/images/PMC4567364_f03_422925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical term for the structure labeled \"Ur\" in the image?" }, { "from": "gpt", "value": "The answer is Urinary bladder" } ], "clip_score": 23.9375, "loss": 1.7717655897140503 }, { "id": "identity_140745", "image": "PlotQA/images/45336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is Amount of CO2 emitted from gaseous fuel consumption" } ], "clip_score": 37.4375, "loss": 1.976732611656189 }, { "id": "identity_260088", "image": "PMC-VQA/images/PMC3699433_F6_215215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify the type of aortic dissection and describe the anatomical location and extent of the dissection" }, { "from": "gpt", "value": "The answer is Type III dissection in the descending aorta extending distally" } ], "clip_score": 28.15625, "loss": 3.022637128829956 }, { "id": "identity_252632", "image": "TextVQA/images/bf475d45b6ec7ef1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible on the field, and what are their jersey numbers?" }, { "from": "gpt", "value": "The answer is 6 players, jersey numbers 5, 29, 34, 7, 8, 3" } ], "clip_score": 17.828125, "loss": 1.8589073419570923 }, { "id": "identity_313958", "image": "VizWiz/images/VizWiz_train_00014962.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the calorie content?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 24.203125, "loss": 2.5284504890441895 }, { "id": "identity_82138", "image": "GeoQA+/images/4279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of segment DE in parallelogram ABCD is 2, what is the length of segment AE?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.34375, "loss": 2.297826051712036 }, { "id": "identity_282016", "image": "PMC-VQA/images/PMC4077832_F1_302660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging techniques were utilized for diagnosing the cardiac myxoma? \nOptions:\n(A) Echocardiography and MRI\n(B) Chest X-ray and CT scan\n(C) Echocardiography and CT scan\n(D) PET scan and X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.5, "loss": 3.2084922790527344 }, { "id": "identity_133521", "image": "TextVQA/images/2235bca399bfd395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the front of the green bus on the left?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 18.96875, "loss": 3.82904052734375 }, { "id": "identity_111336", "image": "TextVQA/images/6395cf6dd32cc27c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many camera lenses are visible on the phone?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 20.125, "loss": 4.6614460945129395 }, { "id": "identity_55084", "image": "FigureQA/images/31111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Dark Blue segment?" }, { "from": "gpt", "value": "The answer is 12.5%" } ], "clip_score": 25.46875, "loss": 1.5128239393234253 }, { "id": "identity_96842", "image": "IconQA/images/77970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more balls are there in the bottom row compared to the top row?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.96875, "loss": 2.210022211074829 }, { "id": "identity_291749", "image": "A-OKVQA/images/160597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what reason do safety workers choose to wear fluorescent colors?\nChoices:\n(A) visibility\n(B) style\n(C) tradition\n(D) cost-effective" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3125, "loss": 4.008335590362549 }, { "id": "identity_2838", "image": "IconQA/images/55466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shapes are triangles?\nChoices:\n(A) 3/10\n(B) 2/5\n(C) 7/10\n(D) 3/4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.75, "loss": 3.4497992992401123 }, { "id": "identity_155767", "image": "ScienceQA/images/8962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the map?" }, { "from": "gpt", "value": "The answer is Iowa" } ], "clip_score": 32.28125, "loss": 2.7890918254852295 }, { "id": "identity_281437", "image": "AI2D/images/636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the cooling of molten rock?\nChoices:\n(A) Melting\n(B) Lava\n(C) Igneous Rock\n(D) Magma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.765625, "loss": 3.926422595977783 }, { "id": "identity_112314", "image": "VQA2.0/images/240485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tank top the person on the right is wearing?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 16.640625, "loss": 3.7246947288513184 }, { "id": "identity_27001", "image": "GeoQA+/images/2242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,将纸片△ABC沿DE折叠使点A落在点A′处,若∠1=80°,∠2=24°,则∠A为()\nChoices:\n(A) 24°\n(B) 28°\n(C) 32°\n(D) 36°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.9375, "loss": 2.4739277362823486 }, { "id": "identity_329727", "image": "GeoQA+/images/3878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,E为DC延长线上一点.若∠BCE=105°,则∠BOD的度数是()\n选项:\n(A) 150°\n(B) 105°\n(C) 75°\n(D) 165°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.640625, "loss": 2.422518491744995 }, { "id": "identity_178174", "image": "IconQA/images/13564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dots if one dot is added to each row?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 26.078125, "loss": 2.0486159324645996 }, { "id": "identity_137496", "image": "DVQA/images/bar_train_00086342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the \"iron\" category across all objects?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 26.203125, "loss": 2.547255277633667 }, { "id": "identity_219322", "image": "A-OKVQA/images/483817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 12.40625, "loss": 2.7236268520355225 }, { "id": "identity_43153", "image": "PlotQA/images/101401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value of cost of communications and computers as a percentage of service imports for Sudan within the given years?" }, { "from": "gpt", "value": "The answer is 2%" } ], "clip_score": 35.21875, "loss": 2.283388376235962 }, { "id": "identity_67959", "image": "ScienceQA/images/18622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the image?" }, { "from": "gpt", "value": "The answer is Arizona" } ], "clip_score": 33.71875, "loss": 2.868813991546631 }, { "id": "identity_14492", "image": "TabMWP/images/33167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Barry need to buy a bag of birdseed, a dog collar, and a cat toy?" }, { "from": "gpt", "value": "The answer is 8.44" } ], "clip_score": 33.5, "loss": 2.076423406600952 }, { "id": "identity_42919", "image": "ChartQA/images/multi_col_100851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of respondents who spent more money on Toiletries and Entertainment combined?" }, { "from": "gpt", "value": "The answer is 23%" } ], "clip_score": 29.390625, "loss": 1.775996208190918 }, { "id": "identity_29668", "image": "A-OKVQA/images/508722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the important part of this sport?\nChoices:\n(A) looks\n(B) fun\n(C) security\n(D) fame" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.375, "loss": 4.9054107666015625 }, { "id": "identity_265345", "image": "GeoQA+/images/247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, angle BAC is 40°, point P is the incenter of triangle ABC. What is the measure of angle BPC? \n(A) 80°\n(B) 110°\n(C) 130°\n(D) 140°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.25, "loss": 2.410461902618408 }, { "id": "identity_40823", "image": "DVQA/images/bar_train_00022114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label has the highest 'bone' value?" }, { "from": "gpt", "value": "The answer is hair" } ], "clip_score": 23.21875, "loss": 4.322378158569336 }, { "id": "identity_255310", "image": "A-OKVQA/images/484666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of services does the store that sells used appliances offer, and how can you identify this from the image?" }, { "from": "gpt", "value": "The answer is Pawning items and buying gold" } ], "clip_score": 24.609375, "loss": 3.7035207748413086 }, { "id": "identity_198404", "image": "PMC-VQA/images/PMC3599198_F2_192267.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is prominently visible in the center of the image?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 22.078125, "loss": 3.9532244205474854 }, { "id": "identity_37250", "image": "FigureQA/images/38285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Blue greater than Dark Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.96875, "loss": 3.5938162803649902 }, { "id": "identity_313941", "image": "TextVQA/images/c310febc40974a94.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which soda brand is being promoted?" }, { "from": "gpt", "value": "The answer is coca cola" } ], "clip_score": 25.171875, "loss": 2.027081251144409 }, { "id": "identity_128328", "image": "VizWiz/images/VizWiz_train_00014569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of exercises done according to the graph?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 27.578125, "loss": 2.2149903774261475 }, { "id": "identity_99767", "image": "CLEVR-Math/images/CLEVR_train_018741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.703125, "loss": 2.526444673538208 }, { "id": "identity_51768", "image": "DVQA/images/bar_train_00031564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'bird' dataset across all algorithms?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.546875, "loss": 2.539062738418579 }, { "id": "identity_159474", "image": "GeoQA+/images/4225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠1 = 56°, what is the measure of ∠D'FC'?" }, { "from": "gpt", "value": "The answer is 68°" } ], "clip_score": 33.375, "loss": 2.045389413833618 }, { "id": "identity_57292", "image": "DocVQA/images/ghhv0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is coffee allowed in the auditorium or upper lounge?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.984375, "loss": 2.626133918762207 }, { "id": "identity_47376", "image": "ChartQA/images/OECD_CHILD_VACCINATION_RATES_BRA_COL_CRI_DNK_EST_000016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much higher is Denmark's vaccination rate compared to Costa Rica?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 23.15625, "loss": 3.201223373413086 }, { "id": "identity_250455", "image": "ChartQA/images/16309028006002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total prevalence percentage of all the mental health and substance use disorders shown in the bar graph?" }, { "from": "gpt", "value": "The answer is 9.25%" } ], "clip_score": 30.578125, "loss": 1.7434934377670288 }, { "id": "identity_325794", "image": "DocVQA/images/zhmw0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is this person named Neal Ball?" }, { "from": "gpt", "value": "The answer is vice president" } ], "clip_score": 17.265625, "loss": 3.635042190551758 }, { "id": "identity_101175", "image": "TabMWP/images/18672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magazines have a number of pages that is a multiple of 5?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 19.734375, "loss": 2.6205551624298096 }, { "id": "identity_32491", "image": "DVQA/images/bar_train_00025614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of accuracies of the algorithm leg for all the datasets?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.28125, "loss": 2.548229455947876 }, { "id": "identity_65901", "image": "AI2D/images/2995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers are there between the Cork Cambium and the Pith?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.1875, "loss": 4.063802242279053 }, { "id": "identity_73284", "image": "AI2D/images/1394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many salivary glands are labeled in the diagram?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.40625, "loss": 2.4639060497283936 }, { "id": "identity_74445", "image": "ScienceQA/images/43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary function of these objects?" }, { "from": "gpt", "value": "The answer is Providing comfort" } ], "clip_score": 18.6875, "loss": 4.185186862945557 }, { "id": "identity_23835", "image": "DocVQA/images/xxhd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which Fund did not approve the request for support?" }, { "from": "gpt", "value": "The answer is The General Foods Fund" } ], "clip_score": 17.515625, "loss": 3.3902347087860107 }, { "id": "identity_170595", "image": "IconQA/images/49886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Linda started reading at 6:45 and the clock now shows 7:00, how long has she been reading?" }, { "from": "gpt", "value": "The answer is 15 minutes" } ], "clip_score": 30.296875, "loss": 1.526489019393921 }, { "id": "identity_223053", "image": "A-OKVQA/images/250001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the audience?" }, { "from": "gpt", "value": "The answer is 58" } ], "clip_score": 14.8203125, "loss": 2.4745192527770996 }, { "id": "identity_304950", "image": "ScienceQA/images/11651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each pair. Which of the following statements is accurate?\nChoices:\n(A) The magnetic force is weaker in one pair.\n(B) The strength of the magnetic force is the same in both pairs.\n(C) The magnetic force is weaker in the other pair." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.4609375, "loss": 4.125746250152588 }, { "id": "identity_142905", "image": "PlotQA/images/57237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Italy have an increasing trend in adjusted net savings from 1970 to 1973?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.671875, "loss": 3.227543592453003 }, { "id": "identity_232291", "image": "TextVQA/images/0a9d5e54fcf25702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the year of minting and the ruler depicted on this 5 francs coin?" }, { "from": "gpt", "value": "The answer is 1813, Napoleon" } ], "clip_score": 33.71875, "loss": 2.1008379459381104 }, { "id": "identity_241549", "image": "AI2D/images/1659.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the sequence of events that leads to a lunar eclipse, and identify the terms used to describe the different parts of the Earth's shadow involved in this process" }, { "from": "gpt", "value": "The answer is Umbra and penumbra" } ], "clip_score": 31.4375, "loss": 4.275752544403076 }, { "id": "identity_123136", "image": "VQA2.0/images/103287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is written at the top of the sign next to the flowers?" }, { "from": "gpt", "value": "The answer is Say it with Flowers!" } ], "clip_score": 30.4375, "loss": 2.057046413421631 }, { "id": "identity_68794", "image": "ScienceQA/images/19696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the natural shape of the object in the image?" }, { "from": "gpt", "value": "The answer is Irregular natural shape" } ], "clip_score": 22.984375, "loss": 4.609912395477295 }, { "id": "identity_74236", "image": "AI2D/images/1802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of fish are depicted in the Onondaga Lake Food Web?" }, { "from": "gpt", "value": "The answer is Three types of fish" } ], "clip_score": 37.21875, "loss": 3.081084728240967 }, { "id": "identity_239334", "image": "FigureQA/images/92314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which x-axis value does the Dark Blue line first become less than the Aqua line, and what are the corresponding y-axis values for both lines at that point?" }, { "from": "gpt", "value": "The answer is x = 0, Dark Blue = 40, Aqua = 70" } ], "clip_score": 32.59375, "loss": 1.6539058685302734 }, { "id": "identity_324488", "image": "PlotQA/images/17958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it the case that in every year, the sum of the net incurrence of foreign liabilities in some places is greater than the sum of net incurrence of foreign liabilities in other places?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.96875, "loss": 2.935478925704956 }, { "id": "identity_171462", "image": "IconQA/images/28102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is modified to have 5 equal sections with 3 green sections, what would be the new probability of landing on green?" }, { "from": "gpt", "value": "The answer is 3/5 or 60%" } ], "clip_score": 31.53125, "loss": 1.1150695085525513 }, { "id": "identity_95198", "image": "CLEVR-Math/images/CLEVR_train_002660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are represented in the objects?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.53125, "loss": 1.9478739500045776 }, { "id": "identity_72027", "image": "ScienceQA/images/2483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the European wildcat in the image showing any signs of movement or stillness?" }, { "from": "gpt", "value": "The answer is Stillness" } ], "clip_score": 34.78125, "loss": 2.697100877761841 }, { "id": "identity_42875", "image": "DVQA/images/bar_train_00163640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people prefer 'fellow' in the 'farm' category compared to the 'attack' category?" }, { "from": "gpt", "value": "The answer is 5.5" } ], "clip_score": 25.90625, "loss": 1.8295342922210693 }, { "id": "identity_310975", "image": "AI2D/images/2016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are blackberries?\nChoices:\n(A) Producers\n(B) Tertiary Consumer\n(C) Primary Consumer\n(D) Secondary Consumer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.3671875, "loss": 4.035722732543945 }, { "id": "identity_320477", "image": "Geometry3K/images/192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle \\( \\widehat{T U} \\).\n\nChoices:\n(A) 11.5\n(B) 23\n(C) 46\n(D) 67" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 3.162799119949341 }, { "id": "identity_265125", "image": "Geometry3K/images/2014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 11.4\n(B) 22.6\n(C) 32.3\n(D) 43.2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.15625, "loss": 2.852165937423706 }, { "id": "identity_82101", "image": "Geometry3K/images/1892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If JH is a midsegment of triangle KLM, and the length of KM is 10, what is the length of segment LM?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 36.5, "loss": 2.212217092514038 }, { "id": "identity_204047", "image": "PMC-VQA/images/PMC3867500_pone-0084115-g001_252779.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of cell abnormality or disease in this image?" }, { "from": "gpt", "value": "The answer is No indication of abnormality or disease" } ], "clip_score": 30.40625, "loss": 2.578425407409668 }, { "id": "identity_3031", "image": "MapQA/images/map_1817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does California have a lower value than Iowa ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.984375, "loss": 3.0549769401550293 }, { "id": "identity_75571", "image": "ScienceQA/images/16740.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any highlighted islands to the east of the main highlighted country?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.8125, "loss": 3.2789039611816406 }, { "id": "identity_336404", "image": "A-OKVQA/images/35711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many individuals in wetsuits are positioned before the crashing wave?\nChoices:\n(A) two\n(B) four\n(C) three\n(D) one" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.46875, "loss": 3.452249050140381 }, { "id": "identity_76187", "image": "AI2D/images/48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which stage in the butterfly life cycle is represented by the caterpillar?" }, { "from": "gpt", "value": "The answer is Larva" } ], "clip_score": 40.71875, "loss": 2.611607313156128 }, { "id": "identity_257723", "image": "DocVQA/images/ghgw0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the appearance, aroma, and flavor characteristics of the substance described in the Description Field?" }, { "from": "gpt", "value": "The answer is Clear, colorless, syrupy, hygroscopic liquid; slight characteristic odor; bland sweet taste" } ], "clip_score": 21.40625, "loss": 2.1749730110168457 }, { "id": "identity_53570", "image": "MapQA/images/map_5052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with a percentage of representatives less than 85.6%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.9375, "loss": 3.102250337600708 }, { "id": "identity_201718", "image": "PMC-VQA/images/PMC7003341_Fig2_111951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to create this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 30.796875, "loss": 2.4972281455993652 }, { "id": "identity_267125", "image": "FigureQA/images/12451.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Cyan or Medium Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.875, "loss": 4.442456245422363 }, { "id": "identity_161682", "image": "GeoQA+/images/3582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠BOD = 130° in the image, what is the measure of ∠ADB?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 32.03125, "loss": 1.8963947296142578 }, { "id": "identity_223889", "image": "TabMWP/images/22557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of problems on the tests that had at least 47 problems?" }, { "from": "gpt", "value": "The answer is 63.33" } ], "clip_score": 25.65625, "loss": 2.0202479362487793 }, { "id": "identity_43259", "image": "FigureQA/images/26205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Firebrick or Crimson have a larger segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Crimson" } ], "clip_score": 37.65625, "loss": 1.980183482170105 }, { "id": "identity_225354", "image": "AI2D/images/3267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure labeled as \"Microvilli\" and describe its function in the cell" }, { "from": "gpt", "value": "The answer is Increase surface area for absorption" } ], "clip_score": 27.734375, "loss": 2.67322039604187 }, { "id": "identity_68714", "image": "TQA/images/question_images/parts_neuron_7217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the neuron is responsible for receiving signals?" }, { "from": "gpt", "value": "The answer is Dendrites" } ], "clip_score": 30.171875, "loss": 2.0177419185638428 }, { "id": "identity_185453", "image": "PMC-VQA/images/PMC7574823_FIG2_278559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of skull fracture in this CT scan?" }, { "from": "gpt", "value": "The answer is No, there are no signs of skull fracture" } ], "clip_score": 31.15625, "loss": 1.3661211729049683 }, { "id": "identity_314958", "image": "TQA/images/question_images/human_system_eye_6126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the eye varies in color among individuals?\nChoices:\n(A) Pupil\n(B) Cornea\n(C) Iris\n(D) Lens" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.5, "loss": 4.91095495223999 }, { "id": "identity_291254", "image": "TabMWP/images/580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of pizzas delivered by a pizza delivery driver over the past 7 days?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.34375, "loss": 2.4943950176239014 }, { "id": "identity_80658", "image": "Geometry3K/images/1521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of diameter PG is 14, find the length of radius AP" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.65625, "loss": 2.01692271232605 }, { "id": "identity_17788", "image": "IconQA/images/56568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) flip\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.265625, "loss": 4.173258304595947 }, { "id": "identity_16015", "image": "Geometry3K/images/1876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find m \\angle E.\nChoices:\n(A) 38\n(B) 76\n(C) 79\n(D) 158" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.390625, "loss": 2.295851707458496 }, { "id": "identity_122183", "image": "VQA2.0/images/293741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fuel is the train likely using to produce steam?" }, { "from": "gpt", "value": "The answer is Coal" } ], "clip_score": 25.078125, "loss": 2.709770441055298 }, { "id": "identity_237538", "image": "TextVQA/images/5448860ebad22135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the Gatorade flavors and their corresponding names and numbers from the image" }, { "from": "gpt", "value": "The answer is Sheriff 18, Blender 3, Megawatt 99, Middle Impact 52" } ], "clip_score": 30.796875, "loss": 3.5771658420562744 }, { "id": "identity_75468", "image": "AI2D/images/4243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many horizons are labeled in the soil profile diagram?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 38.0625, "loss": 2.1388676166534424 }, { "id": "identity_292578", "image": "PMC-VQA/images/PMC5613789_F7_219958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is the position of the sagittal slice displayed?\nChoices:\n(A) 30 mm to the left of the AC\n(B) 56.5 mm to the right of the AC\n(C) 30 mm to the right of the AC\n(D) 56.5 mm to the left of the AC" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.609375, "loss": 4.1226091384887695 }, { "id": "identity_260978", "image": "DVQA/images/bar_train_00092708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms 'poetry' and 'worth' across all datasets?" }, { "from": "gpt", "value": "The answer is 5.17" } ], "clip_score": 35.15625, "loss": 2.151050090789795 }, { "id": "identity_181704", "image": "IconQA/images/91426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours will it be before the clock shows 3 o'clock?" }, { "from": "gpt", "value": "The answer is 5 hours" } ], "clip_score": 31.21875, "loss": 2.2534830570220947 }, { "id": "identity_152165", "image": "ScienceQA/images/697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the cup?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 31.921875, "loss": 3.6526787281036377 }, { "id": "identity_308290", "image": "UniGeo/images/104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于一个圆,AD是圆的直径,∠ABC=25°,则∠CAD的度数为()\nChoices:\n(A) 25°\n(B) 50°\n(C) 65°\n(D) 75°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.640625, "loss": 2.5305285453796387 }, { "id": "identity_139312", "image": "DocVQA/images/qqgh0023_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ship to account number for this order?" }, { "from": "gpt", "value": "The answer is 678886" } ], "clip_score": 23.28125, "loss": 2.8859970569610596 }, { "id": "identity_281328", "image": "PMC-VQA/images/PMC5770999_Fig1_264430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the Chest CT scan? \nChoices:\n(A) Heart abnormalities \n(B) Asymmetrical arm muscle strength \n(C) Bilateral, asymmetric, ground-glass opacities and reticular shadows \n(D) Uneven bone density and structure." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.859375, "loss": 3.899549961090088 }, { "id": "identity_16035", "image": "DVQA/images/bar_train_00027458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of bars contain at least one bar with value greater than 4?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 22.859375, "loss": 4.116782188415527 }, { "id": "identity_93232", "image": "IconQA/images/40151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the new position of the top-left corner of the letter P after the transformation?" }, { "from": "gpt", "value": "The answer is 4 squares to the right and 3 squares down" } ], "clip_score": 31.96875, "loss": 1.8345891237258911 }, { "id": "identity_90346", "image": "GeoQA+/images/3086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C都在⊙O上,若∠A=44°,则∠ABC的度数为多少?" }, { "from": "gpt", "value": "The answer is 22°" } ], "clip_score": 31.28125, "loss": 2.381787061691284 }, { "id": "identity_231225", "image": "A-OKVQA/images/531146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the large windshield on the bus contribute to both the driver's visibility and passenger safety, and what are the potential benefits of this design in terms of accident prevention?" }, { "from": "gpt", "value": "The answer is Enhanced visibility and safety, reducing accident likelihood" } ], "clip_score": 24.328125, "loss": 4.105269908905029 }, { "id": "identity_229770", "image": "TextVQA/images/8774ea9366a4c286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different directions are indicated by the one-way signs, and what is the direction of the pedestrian crossing sign?" }, { "from": "gpt", "value": "The answer is One unique direction, left" } ], "clip_score": 33.09375, "loss": 5.130604267120361 }, { "id": "identity_58530", "image": "DocVQA/images/zsmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the document?" }, { "from": "gpt", "value": "The answer is 12-21-60" } ], "clip_score": 19.109375, "loss": 2.4586799144744873 }, { "id": "identity_152690", "image": "ScienceQA/images/154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What will be the interaction between the two magnets based on their poles?" }, { "from": "gpt", "value": "The answer is They will repel each other" } ], "clip_score": 21.828125, "loss": 2.4672958850860596 }, { "id": "identity_193304", "image": "PMC-VQA/images/PMC6813448_mbo3894-fig-0005_56317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible fruiting bodies on the lichen in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.046875, "loss": 3.017467975616455 }, { "id": "identity_13851", "image": "PMC-VQA/images/PMC5902959_Fig2_302322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side of the brain are locations b and c in image a and locations e and f in image d? \nChoices:\n(A) Left side \n(B) Right side \n(C) Both sides \n(D) Neither side " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.6875, "loss": 3.733790874481201 }, { "id": "identity_76928", "image": "ScienceQA/images/10168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the air temperature at 40°N, 110°E?" }, { "from": "gpt", "value": "The answer is 25°C" } ], "clip_score": 16.640625, "loss": 1.8737874031066895 }, { "id": "identity_19775", "image": "VQA-AS/images/285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.96875, "loss": 2.4216184616088867 }, { "id": "identity_227549", "image": "Geometry3K/images/164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle 8" }, { "from": "gpt", "value": "The answer is 135°" } ], "clip_score": 34.3125, "loss": 1.9200886487960815 }, { "id": "identity_37569", "image": "Geometry3K/images/413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 114\n(B) 118\n(C) 121\n(D) 125" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 2.603404998779297 }, { "id": "identity_42202", "image": "PlotQA/images/13404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Iceland have the lowest number of refugees?" }, { "from": "gpt", "value": "The answer is 2007" } ], "clip_score": 29.75, "loss": 1.6129097938537598 }, { "id": "identity_215420", "image": "TextVQA/images/733b665856487060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bottles with a portrait on the label?" }, { "from": "gpt", "value": "The answer is Yes, there are two bottles with portraits" } ], "clip_score": 23.359375, "loss": 1.9937347173690796 }, { "id": "identity_59756", "image": "FigureQA/images/59661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the larger portion of the pie chart?" }, { "from": "gpt", "value": "The answer is Forest Green" } ], "clip_score": 37.125, "loss": 3.1329143047332764 }, { "id": "identity_126342", "image": "PMC-VQA/images/PMC6267895_Fig6_404450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of any fractures or breaks in the bone?" }, { "from": "gpt", "value": "The answer is No evidence of fractures" } ], "clip_score": 22.46875, "loss": 3.061751127243042 }, { "id": "identity_73590", "image": "ScienceQA/images/7141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vegetables on Gabe's want list?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.875, "loss": 3.410802125930786 }, { "id": "identity_251337", "image": "UniGeo/images/2461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,正三角形的边长为6,点P为BC边上一点,且PC=4,D为AC上一点,∠APD=60°,求三角形APD的面积。" }, { "from": "gpt", "value": "The answer is \\(\\frac{4}{3}\\)" } ], "clip_score": 27.359375, "loss": 2.9127397537231445 }, { "id": "identity_170625", "image": "TabMWP/images/30635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest number of bags of popcorn sold in the past 4 days?" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 41.53125, "loss": 2.785719633102417 }, { "id": "identity_39876", "image": "DVQA/images/bar_train_00173978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who preferred the object lot in the category week?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.21875, "loss": 2.58500075340271 }, { "id": "identity_293694", "image": "A-OKVQA/images/554636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of business is being promoted on the building with white letters?\nChoices:\n(A) electronics store\n(B) sports retailer\n(C) food chain\n(D) repair shop" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 9.6640625, "loss": 4.17072057723999 }, { "id": "identity_301867", "image": "DocVQA/images/jmwn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number at the top?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 16.796875, "loss": 2.282946825027466 }, { "id": "identity_249001", "image": "DocVQA/images/gtxv0068_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total pages does this telefax message contain, and what is the sender's telephone number?" }, { "from": "gpt", "value": "The answer is 72 pages, (336) 741-6303" } ], "clip_score": 26.890625, "loss": 2.484841823577881 }, { "id": "identity_223663", "image": "A-OKVQA/images/267205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the person in the center of the image riding?" }, { "from": "gpt", "value": "The answer is Bicycle" } ], "clip_score": 16.921875, "loss": 2.6180579662323 }, { "id": "identity_23116", "image": "VQA2.0/images/28853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of automobile is at the center of the photo?" }, { "from": "gpt", "value": "The answer is chevrolet" } ], "clip_score": 19.125, "loss": 2.7353057861328125 }, { "id": "identity_63677", "image": "ScienceQA/images/16500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 13.296875, "loss": 2.905662775039673 }, { "id": "identity_144642", "image": "DocVQA/images/tffb0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role of EPA is discussed in the proposal at 10:45 a.m.?" }, { "from": "gpt", "value": "The answer is Possible role of EPA in the proposal" } ], "clip_score": 21.265625, "loss": 3.2265899181365967 }, { "id": "identity_223853", "image": "FigureQA/images/82166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank all the colors from the minimum to the maximum area under the curve" }, { "from": "gpt", "value": "The answer is Light Salmon, Medium Turquoise, Medium Mint, Dark Gray, Mint" } ], "clip_score": 28.53125, "loss": 1.0763607025146484 }, { "id": "identity_85454", "image": "GeoQA+/images/461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle O, if AB is the diameter and angle BCD is 130 degrees, what is the measure of angle BAD?" }, { "from": "gpt", "value": "The answer is 80 degrees" } ], "clip_score": 34.46875, "loss": 2.0645809173583984 }, { "id": "identity_37119", "image": "A-OKVQA/images/236747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What work is done in this space?\nChoices:\n(A) coding\n(B) machine shop\n(C) cooking\n(D) typing" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.234375, "loss": 4.483128547668457 }, { "id": "identity_293475", "image": "VQA2.0/images/280241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the dog?" }, { "from": "gpt", "value": "The answer is front" } ], "clip_score": 17.0, "loss": 5.554227352142334 }, { "id": "identity_212993", "image": "PMC-VQA/images/PMC6905104_Fig5_82262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of label numbers visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 16.984375, "loss": 1.9600404500961304 }, { "id": "identity_103407", "image": "VQA2.0/images/396188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians are visible on the sidewalk to the left?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.5, "loss": 4.538660526275635 }, { "id": "identity_71484", "image": "ScienceQA/images/5347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which solution has a lower concentration of green particles?" }, { "from": "gpt", "value": "The answer is Solution A" } ], "clip_score": 32.46875, "loss": 2.198899269104004 }, { "id": "identity_264034", "image": "Geometry3K/images/738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram with rectangle LMNP and parallelogram LKMJ, if the measure of angle MJN is 35 degrees, what is the measure of angle MPN?\nChoices:\n(A) 17.5\n(B) 30\n(C) 35\n(D) 70" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.96875, "loss": 2.2483420372009277 }, { "id": "identity_328883", "image": "A-OKVQA/images/450716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these animals lowering their heads?\nChoices:\n(A) to walk\n(B) for petting\n(C) to comb\n(D) to eat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.5625, "loss": 3.5624237060546875 }, { "id": "identity_337159", "image": "Super-CLEVR/images/superCLEVR_new_001668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many metallic cruisers are on the right side of the yellow thing in front of the large green bike?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.828125, "loss": 2.5462706089019775 }, { "id": "identity_240225", "image": "DVQA/images/bar_train_00048443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for all items across all stores?" }, { "from": "gpt", "value": "The answer is 52 units" } ], "clip_score": 28.796875, "loss": 2.4219112396240234 }, { "id": "identity_211376", "image": "PMC-VQA/images/PMC5402788_F2_156284.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a fracture in the cervical vertebrae?" }, { "from": "gpt", "value": "The answer is No evidence of fracture visible" } ], "clip_score": 29.21875, "loss": 3.377246856689453 }, { "id": "identity_60616", "image": "DocVQA/images/hjpj0226_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is FOOD COLORING -- 2" } ], "clip_score": 20.296875, "loss": 2.5955822467803955 }, { "id": "identity_220278", "image": "TextVQA/images/a5988e070332060f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color scheme is used for the soap packaging?" }, { "from": "gpt", "value": "The answer is Brown and White" } ], "clip_score": 25.296875, "loss": 2.614919900894165 }, { "id": "identity_124573", "image": "VQA2.0/images/559747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the airplane currently parked at a gate or on the tarmac?" }, { "from": "gpt", "value": "The answer is Parked at a gate" } ], "clip_score": 25.53125, "loss": 1.885301113128662 }, { "id": "identity_157035", "image": "AI2D/images/4881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the feature formed above the subduction zone on the continental plate?" }, { "from": "gpt", "value": "The answer is Volcanic Arc" } ], "clip_score": 31.4375, "loss": 2.1552317142486572 }, { "id": "identity_109225", "image": "TextVQA/images/eb4461d8ebebabcd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What complication is visible at the 6 o'clock position?" }, { "from": "gpt", "value": "The answer is Tourbillon" } ], "clip_score": 30.203125, "loss": 2.769479513168335 }, { "id": "identity_41180", "image": "FigureQA/images/60093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Firebrick line?" }, { "from": "gpt", "value": "The answer is 95" } ], "clip_score": 22.109375, "loss": 2.6479732990264893 }, { "id": "identity_56107", "image": "DVQA/images/bar_train_00143914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people prefer 'knife' over 'horror'?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.78125, "loss": 2.187390089035034 }, { "id": "identity_140510", "image": "DocVQA/images/llcv0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the chairman of the workshop?" }, { "from": "gpt", "value": "The answer is Sir MacFarlane Burnet" } ], "clip_score": 24.046875, "loss": 2.199355125427246 }, { "id": "identity_193995", "image": "PMC-VQA/images/PMC4619722_pone.0140537.g005_437036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI image is shown in the figure?" }, { "from": "gpt", "value": "The answer is Functional MRI (fMRI)" } ], "clip_score": 32.5, "loss": 1.8235350847244263 }, { "id": "identity_90487", "image": "GeoQA+/images/2782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠COE = 65° and OE⊥AB, what is the measure of ∠BOE?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.65625, "loss": 1.5918660163879395 }, { "id": "identity_46738", "image": "DVQA/images/bar_train_00193866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of 'heat' values between 'talk' and 'device'?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 20.453125, "loss": 2.0131490230560303 }, { "id": "identity_252946", "image": "GeoQA+/images/1711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知⊙O的半径为10cm,弦AB的长为16cm,P是AB的延长线上一点,BP=4cm,求OP的平方加上AB的平方的值。" }, { "from": "gpt", "value": "The answer is 436" } ], "clip_score": 25.515625, "loss": 2.7744579315185547 }, { "id": "identity_64309", "image": "ScienceQA/images/18589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents are fully visible in this image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.03125, "loss": 2.4726696014404297 }, { "id": "identity_245965", "image": "A-OKVQA/images/201069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What services are most likely offered inside the building next to the cars, and what evidence from the image supports this conclusion?" }, { "from": "gpt", "value": "The answer is Banking services, including ATM access, are offered inside the building" } ], "clip_score": 20.8125, "loss": 2.9739456176757812 }, { "id": "identity_249820", "image": "TextVQA/images/f3454f7a0d6b2e11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different sponsors are displayed on the banners, and what are their names?" }, { "from": "gpt", "value": "The answer is Three sponsors: Coors, Carling, and one unspecified" } ], "clip_score": 18.109375, "loss": 3.3964157104492188 }, { "id": "identity_163014", "image": "Geometry3K/images/1417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle opposite to angle 1 in the triangle formed inside the circle?" }, { "from": "gpt", "value": "The answer is 42 degrees" } ], "clip_score": 33.3125, "loss": 2.48740553855896 }, { "id": "identity_323771", "image": "A-OKVQA/images/68231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these planes called?\nChoices:\n(A) airbus\n(B) jet\n(C) commuter\n(D) seaplane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.28125, "loss": 3.6675686836242676 }, { "id": "identity_268028", "image": "GeoQA+/images/5504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, there is a square DEFG with D on AC, E and F on AB, and line AG intersecting DE and BC at points M and N respectively. If angle B is 90 degrees, AB is 4, BC is 3, EF is 1, what is the length of BN?\nChoices:\n(A) \\frac{4}{3}\n(B) \\frac{3}{2}\n(C) \\frac{8}{5}\n(D) \\frac{12}{7}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.125, "loss": 2.3171756267547607 }, { "id": "identity_259385", "image": "ChartQA/images/7903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined total of U.S. attorneys general with experience in the Military, Judge, and Justice Department categories?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 33.59375, "loss": 2.6981091499328613 }, { "id": "identity_205962", "image": "A-OKVQA/images/301714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vegetable on the left side of the image?" }, { "from": "gpt", "value": "The answer is White and purple" } ], "clip_score": 26.984375, "loss": 3.3643009662628174 }, { "id": "identity_99651", "image": "IconQA/images/22236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of circles in the image?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 27.5625, "loss": 2.0288217067718506 }, { "id": "identity_90900", "image": "Geometry3K/images/1404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the inscribed quadrilateral ABCD, if AB is parallel to DC, what is the measure of angle ADC?" }, { "from": "gpt", "value": "The answer is 86 degrees" } ], "clip_score": 32.96875, "loss": 2.70871639251709 }, { "id": "identity_208740", "image": "VQA2.0/images/476515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the building directly behind the clock tower?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 29.28125, "loss": 3.7105631828308105 }, { "id": "identity_76350", "image": "ScienceQA/images/2730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Name the state that is to the south of the highlighted state" }, { "from": "gpt", "value": "The answer is Arkansas" } ], "clip_score": 32.34375, "loss": 2.744762420654297 }, { "id": "identity_90627", "image": "UniGeo/images/3468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, CD is a chord, and angle ABD is 58 degrees, what is the measure of angle AOD, where O is the center of the circle?" }, { "from": "gpt", "value": "The answer is 64 degrees" } ], "clip_score": 35.15625, "loss": 2.111098289489746 }, { "id": "identity_162876", "image": "Geometry3K/images/2025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle FEC in the circle?" }, { "from": "gpt", "value": "The answer is 45°" } ], "clip_score": 38.03125, "loss": 1.5493085384368896 }, { "id": "identity_202609", "image": "A-OKVQA/images/255795.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number on the advertisement?" }, { "from": "gpt", "value": "The answer is 1-877-FOGS-OUT" } ], "clip_score": 17.109375, "loss": 1.9880497455596924 }, { "id": "identity_274476", "image": "VQA-RAD/images/synpic49862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the lesion?" }, { "from": "gpt", "value": "The answer is Left Parietal lobe" } ], "clip_score": 28.375, "loss": 3.149138927459717 }, { "id": "identity_193869", "image": "VQA2.0/images/431228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rug located at the entrance of the room?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.21875, "loss": 3.098872661590576 }, { "id": "identity_109016", "image": "VQA-AS/images/19553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the book on the table?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.8125, "loss": 2.858337163925171 }, { "id": "identity_92065", "image": "TabMWP/images/23875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of donations received per month?" }, { "from": "gpt", "value": "The answer is 450.75" } ], "clip_score": 33.34375, "loss": 2.2769410610198975 }, { "id": "identity_105283", "image": "A-OKVQA/images/31135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main protein ingredient in the sandwich?" }, { "from": "gpt", "value": "The answer is Chicken" } ], "clip_score": 23.015625, "loss": 3.0634968280792236 }, { "id": "identity_157844", "image": "TQA/images/abc_question_images/earth_moon_phases_12536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter corresponds to the full moon phase?" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 31.828125, "loss": 2.9372081756591797 }, { "id": "identity_238670", "image": "FigureQA/images/73396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average y-axis value?" }, { "from": "gpt", "value": "The answer is Sky Blue" } ], "clip_score": 25.875, "loss": 2.677657127380371 }, { "id": "identity_59507", "image": "FigureQA/images/77199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Dark Green at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 29.609375, "loss": 2.7584118843078613 }, { "id": "identity_212694", "image": "PMC-VQA/images/PMC7728654_Fig2_318208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this image show signs of a stroke?" }, { "from": "gpt", "value": "The answer is No, there are no signs of a stroke" } ], "clip_score": 30.0, "loss": 1.9637001752853394 }, { "id": "identity_1634", "image": "PMC-VQA/images/PMC6842499_Fig2_65273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 2. Which vertebra was the focus of the surgery?\nChoices:\n(A) C5. \n(B) C6. \n(C) C7. \n(D) C8. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.015625, "loss": 4.12258768081665 }, { "id": "identity_296368", "image": "DocVQA/images/mjvg0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the state is referenced on the page?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 16.9375, "loss": 2.872220993041992 }, { "id": "identity_184362", "image": "VQA2.0/images/103352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the toilet seat cover?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.28125, "loss": 2.8935647010803223 }, { "id": "identity_225714", "image": "PMC-VQA/images/PMC8070318_cancers-13-01806-f001_426440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the skeletal muscle index (SMI) for both images and determine the difference between them. Assume the skeletal muscle area (SMA) for the upper image is 120 cm² and for the lower image is 150 cm². The height of the individual for both images is 1.75 meters" }, { "from": "gpt", "value": "The answer is 9.8 cm²/m²" } ], "clip_score": 19.21875, "loss": 2.278794288635254 }, { "id": "identity_29176", "image": "VQA2.0/images/107353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What animal is here?" }, { "from": "gpt", "value": "The answer is buffalo" } ], "clip_score": 16.484375, "loss": 3.198706865310669 }, { "id": "identity_51981", "image": "DVQA/images/bar_train_00131252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object 'marine' have an equal number of preferences in both categories?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.328125, "loss": 2.772709846496582 }, { "id": "identity_240723", "image": "VQA2.0/images/18866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of wheels visible on the scooters?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 22.875, "loss": 1.9472154378890991 }, { "id": "identity_221274", "image": "A-OKVQA/images/199437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the blackboard to the right?" }, { "from": "gpt", "value": "The answer is Gourmet Sandwiches" } ], "clip_score": 21.921875, "loss": 2.8048720359802246 }, { "id": "identity_44870", "image": "FigureQA/images/79142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are represented by Slate?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.171875, "loss": 2.174252986907959 }, { "id": "identity_252023", "image": "A-OKVQA/images/37932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two items being carried by the woman and the dog, and what is the color of the item in the dog's mouth?" }, { "from": "gpt", "value": "The answer is Bouquet of flowers, umbrella, orange" } ], "clip_score": 28.25, "loss": 3.545574188232422 }, { "id": "identity_258284", "image": "A-OKVQA/images/210052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the benefits of the rider wearing gloves for grip while riding a motorcycle?" }, { "from": "gpt", "value": "The answer is Safety, comfort, protection, temperature regulation, and improved performance" } ], "clip_score": 29.96875, "loss": 2.94732666015625 }, { "id": "identity_35481", "image": "PMC-VQA/images/PMC6386756_Fig7_439553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which image modality was used to obtain the image? \nChoices:\n(A) T1WI \n(B) T2WI \n(C) DWI \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 4.1075263023376465 }, { "id": "identity_37870", "image": "VQA2.0/images/274514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color coat is the man wearing on the horse?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 27.0, "loss": 3.481086015701294 }, { "id": "identity_253141", "image": "GeoQA+/images/4677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD∥BE∥CF,直线l1,l2与这三条平行线分别交于点A,B,C和点D,E,F,\\frac{AB}{BC}=\\frac{2}{3},DE=4,若AB=2x,BC=3x,求EF的长。" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.125, "loss": 2.7902681827545166 }, { "id": "identity_205054", "image": "PMC-VQA/images/PMC5379728_Fig1_149081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological sign present in the image that indicates a pathology?" }, { "from": "gpt", "value": "The answer is Enlarged, irregularly contoured left kidney" } ], "clip_score": 30.96875, "loss": 3.4274542331695557 }, { "id": "identity_89830", "image": "Geometry3K/images/1483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle RST in the rhombus RSTU?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 36.375, "loss": 1.799149751663208 }, { "id": "identity_43064", "image": "MapQA/images/map_3750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value greater than 5600?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.125, "loss": 2.183659315109253 }, { "id": "identity_77564", "image": "AI2D/images/1919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a secondary consumer in this food web?" }, { "from": "gpt", "value": "The answer is The fox" } ], "clip_score": 36.46875, "loss": 3.3107106685638428 }, { "id": "identity_262663", "image": "DocVQA/images/rrcw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the forum mentioned in the document header?" }, { "from": "gpt", "value": "The answer is ERT/HRT Expert Forums" } ], "clip_score": 22.328125, "loss": 3.8887972831726074 }, { "id": "identity_67509", "image": "ScienceQA/images/18727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 26.40625, "loss": 2.2835044860839844 }, { "id": "identity_136192", "image": "DVQA/images/bar_train_00048187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of \"chair\" at \"plot\"?" }, { "from": "gpt", "value": "The answer is 10^8" } ], "clip_score": 17.859375, "loss": 3.213925361633301 }, { "id": "identity_73048", "image": "AI2D/images/946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird's beak is adapted for probing into the ground for insects?" }, { "from": "gpt", "value": "The answer is Bird at label D" } ], "clip_score": 31.890625, "loss": 4.399325370788574 }, { "id": "identity_118048", "image": "TextVQA/images/7608bdc4a73f4ca8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the pinball machine?" }, { "from": "gpt", "value": "The answer is Bally" } ], "clip_score": 31.515625, "loss": 3.514904737472534 }, { "id": "identity_332438", "image": "IconQA/images/28996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many items are there?\nChoices:\n(A) 5\n(B) 2\n(C) 1\n(D) 3\n(E) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.671875, "loss": 3.3338475227355957 }, { "id": "identity_323787", "image": "TabMWP/images/33387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A person compared the ages of some relatives. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.25, "loss": 3.158512830734253 }, { "id": "identity_266086", "image": "Geometry3K/images/2070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the value of angle XT?\nChoices:\n(A) 3.5\n(B) 7\n(C) 14\n(D) 28" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.890625, "loss": 3.7890312671661377 }, { "id": "identity_250092", "image": "FigureQA/images/50789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the values represented by the bars?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 23.28125, "loss": 2.517835855484009 }, { "id": "identity_251364", "image": "IconQA/images/6227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Kevin is staying inside this afternoon because it is raining. His watch shows the time. If Kevin started his indoor activities at 11:30 A.M. and has been inside for 1 hour and 30 minutes, what time will it be when he finishes his activities?" }, { "from": "gpt", "value": "The answer is 1:00 P.M" } ], "clip_score": 27.296875, "loss": 1.316590428352356 }, { "id": "identity_103911", "image": "VizWiz/images/VizWiz_train_00009179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the coffee pod shown in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 26.3125, "loss": 3.0803236961364746 }, { "id": "identity_338552", "image": "UniGeo/images/1694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知CD是⊙O的直径,过点D的弦DE平行于某条半径,若∠D的度数是50°,则∠C的度数是()\n\nChoices:\n(A) 25°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.96875, "loss": 2.518392324447632 }, { "id": "identity_96868", "image": "TabMWP/images/34710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of homes sold in 2005 across all cities?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 35.0625, "loss": 2.113589286804199 }, { "id": "identity_178239", "image": "TabMWP/images/23593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did Lucy have the highest number of undeleted voicemail messages?" }, { "from": "gpt", "value": "The answer is Wednesday" } ], "clip_score": 41.0, "loss": 1.9699714183807373 }, { "id": "identity_305634", "image": "DVQA/images/bar_train_00144466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is pay" } ], "clip_score": 22.1875, "loss": 3.5605673789978027 }, { "id": "identity_328858", "image": "PMC-VQA/images/PMC9414956_microorganisms-10-01640-f003_385876.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bone is affected by the disease shown here?\nChoices:\n(A) Proximal tibia.\n(B) Distal tibia.\n(C) Proximal fibula.\n(D) Distal fibula." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 4.420681953430176 }, { "id": "identity_135899", "image": "DVQA/images/bar_train_00125050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median accuracy value for the algorithm 'spot'?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 34.5, "loss": 2.8213202953338623 }, { "id": "identity_239826", "image": "UniGeo/images/2844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的弦,点C在圆上,已知∠OBA=40°,若∠AOC=2x,则x的值是多少?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 32.4375, "loss": 2.7430834770202637 }, { "id": "identity_111389", "image": "TextVQA/images/8d03ac99846d80d1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many LED diagnostic lights does the computer have?" }, { "from": "gpt", "value": "The answer is 8 LED diagnostic lights" } ], "clip_score": 20.328125, "loss": 1.876390814781189 }, { "id": "identity_145961", "image": "DocVQA/images/mhcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which university is affiliated with the Medical Care Group mentioned in the document?" }, { "from": "gpt", "value": "The answer is Washington University School of Medicine" } ], "clip_score": 24.9375, "loss": 1.5343767404556274 }, { "id": "identity_185832", "image": "A-OKVQA/images/106635.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What trick is the skateboarder performing?" }, { "from": "gpt", "value": "The answer is Boardslide" } ], "clip_score": 29.453125, "loss": 2.980081796646118 }, { "id": "identity_268667", "image": "UniGeo/images/2810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB and CD are diameters of circle O, with chord CE parallel to AB and measuring 100°. What is the degree measure of angle AOC?\nChoices:\n(A) 30°\n(B) 39°\n(C) 40°\n(D) 45°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.84375, "loss": 2.536076784133911 }, { "id": "identity_221921", "image": "A-OKVQA/images/477343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski lifts are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.09375, "loss": 4.663687705993652 }, { "id": "identity_177043", "image": "TabMWP/images/7210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money donated to all causes in April?" }, { "from": "gpt", "value": "The answer is $49" } ], "clip_score": 34.34375, "loss": 2.5529305934906006 }, { "id": "identity_12252", "image": "VQA-AS/images/6216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they playing with toys?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 22.78125, "loss": 3.5749869346618652 }, { "id": "identity_169144", "image": "TabMWP/images/1889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students scored between 30 to 50 points inclusive?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.09375, "loss": 2.368422746658325 }, { "id": "identity_71575", "image": "TQA/images/question_images/biomes_8016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the Chaparral biome on the map?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.046875, "loss": 2.946239471435547 }, { "id": "identity_288965", "image": "ScienceQA/images/18771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which trade option would allow Natalie and Mitch to exchange items to get what they want? \nChoices:\n(A) Mitch exchanging his almonds for Natalie's tomatoes.\n(B) Mitch exchanging his broccoli for Natalie's oranges.\n(C) Natalie exchanging her tomatoes for Mitch's broccoli.\n(D) Natalie exchanging her tomatoes for Mitch's carrots." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.75, "loss": 3.539249897003174 }, { "id": "identity_30571", "image": "A-OKVQA/images/136539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they in this enclosed case?\nChoices:\n(A) anti-theft\n(B) disinfect\n(C) defrost\n(D) keep cold" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.03125, "loss": 3.8158042430877686 }, { "id": "identity_54695", "image": "DocVQA/images/khxj0037_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scheduled week for the RL Evaporator Upgrade fabrication at Semiworks?" }, { "from": "gpt", "value": "The answer is the week of 6/18/90" } ], "clip_score": 25.453125, "loss": 1.812307357788086 }, { "id": "identity_10409", "image": "GeoQA+/images/3962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,弦CD⊥AB,P是⁀{CD}上任意一点(不与点C、D重合),若⁀{BD}的度数是50°,则∠CPA的度数是()\nChoices:\n(A) 65°\n(B) 50°\n(C) 130°\n(D) 100°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.234375, "loss": 2.721745252609253 }, { "id": "identity_218933", "image": "A-OKVQA/images/343322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the second floor of the building?" }, { "from": "gpt", "value": "The answer is Cantina" } ], "clip_score": 17.546875, "loss": 3.9984095096588135 }, { "id": "identity_307711", "image": "A-OKVQA/images/202372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many red umbrellas are hanging up in the ropes above the dining area?\nChoices:\n(A) five\n(B) six\n(C) four\n(D) three" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.90625, "loss": 3.325761318206787 }, { "id": "identity_161358", "image": "GeoQA+/images/361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is the diameter of circle O and angle BCD is 25 degrees, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 155 degrees" } ], "clip_score": 35.5625, "loss": 1.9270176887512207 }, { "id": "identity_161007", "image": "GeoQA+/images/5386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,∠AOB=50°,求弧AC的度数为多少?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 30.265625, "loss": 1.9656362533569336 }, { "id": "identity_174816", "image": "IconQA/images/64928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more pails are there than shovels?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.046875, "loss": 2.11539626121521 }, { "id": "identity_73145", "image": "AI2D/images/801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animals are tertiary consumers in this food chain?" }, { "from": "gpt", "value": "The answer is Mountain Lion and Hawk" } ], "clip_score": 31.28125, "loss": 2.19156551361084 }, { "id": "identity_200041", "image": "VQA2.0/images/530002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a printer on the desk?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.640625, "loss": 3.3959708213806152 }, { "id": "identity_103882", "image": "TextVQA/images/04e14a8f5dc496b8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum height the geyser can reach during an eruption?" }, { "from": "gpt", "value": "The answer is 184 ft" } ], "clip_score": 20.5, "loss": 2.21012020111084 }, { "id": "identity_115602", "image": "TextVQA/images/ee72c08a127be37f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the machine standing on a wooden surface?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.328125, "loss": 3.0727591514587402 }, { "id": "identity_111256", "image": "VQA2.0/images/13121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tiers does the cake have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 26.875, "loss": 4.179832935333252 }, { "id": "identity_205611", "image": "Super-CLEVR/images/superCLEVR_new_001143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle in the image that is the same color as the airplane?" }, { "from": "gpt", "value": "The answer is No, there is no vehicle the same color as the airplane" } ], "clip_score": 23.0, "loss": 1.7626266479492188 }, { "id": "identity_316208", "image": "GeoQA+/images/2580.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在ABCD中,AB=3,BC=4,则这个图形的周长是()\n选项:\n(A) 6\n(B) 8\n(C) 14\n(D) 16" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5625, "loss": 2.4051222801208496 }, { "id": "identity_286927", "image": "PMC-VQA/images/PMC5455666_materials-08-04287-f005_173123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is the fractured surface depicted in SEM images made of?\nChoices:\n(A) Copper \n(B) Aluminum \n(C) Titanium \n(D) Iron" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.65625, "loss": 3.8973915576934814 }, { "id": "identity_236105", "image": "FigureQA/images/20759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the pie chart occupied by each color, and which color occupies the largest percentage?" }, { "from": "gpt", "value": "The answer is Crimson" } ], "clip_score": 39.84375, "loss": 2.719306230545044 }, { "id": "identity_80316", "image": "UniGeo/images/1766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADG in the given diagram?" }, { "from": "gpt", "value": "The answer is 130 degrees" } ], "clip_score": 33.125, "loss": 2.186753273010254 }, { "id": "identity_121857", "image": "TextVQA/images/14d8b72e83d7554b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the phone in the image turned on or off?" }, { "from": "gpt", "value": "The answer is Turned on" } ], "clip_score": 15.6171875, "loss": 2.5321972370147705 }, { "id": "identity_193038", "image": "VQA2.0/images/527364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many train tracks are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.234375, "loss": 2.475492477416992 }, { "id": "identity_206010", "image": "VQA-AS/images/12834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is depicted on the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white checkered" } ], "clip_score": 29.59375, "loss": 2.287073850631714 }, { "id": "identity_124805", "image": "VQA-AS/images/14212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clouds are in the sky?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.75, "loss": 2.4183242321014404 }, { "id": "identity_308677", "image": "ScienceQA/images/18041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property matches this item?\nChoices:\n(A) smooth\n(B) sticky" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.203125, "loss": 4.562605381011963 }, { "id": "identity_319439", "image": "A-OKVQA/images/559288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a sign on top of the truck?\nChoices:\n(A) original part\n(B) vandalism\n(C) keep score\n(D) advertising" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.46875, "loss": 3.8884665966033936 }, { "id": "identity_102580", "image": "TabMWP/images/12512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jen decides to buy 3/4 pound of large binder clips, how much change will she receive from a $10 bill?" }, { "from": "gpt", "value": "The answer is $6.25" } ], "clip_score": 34.03125, "loss": 1.7961984872817993 }, { "id": "identity_269092", "image": "AI2D/images/3373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the anther contain among the following choices: pollen grains, sepal, ovaries, or stigma?\nChoices:\n(A) pollen grains\n(B) sepal\n(C) ovaries\n(D) stigma" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.703125, "loss": 4.110356330871582 }, { "id": "identity_1302", "image": "GeoQA+/images/888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在ABCD中,∠B+∠D=200°,则∠A的度数为()\nChoices:\n(A) 95°\n(B) 90°\n(C) 85°\n(D) 80°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.3125, "loss": 2.6252715587615967 }, { "id": "identity_209343", "image": "PMC-VQA/images/PMC6662306_pbi13074-fig-0003_15391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections of the sample are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.859375, "loss": 2.5191051959991455 }, { "id": "identity_63910", "image": "AI2D/images/2598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the force exerted by the Moon on the Earth?" }, { "from": "gpt", "value": "The answer is Rightward" } ], "clip_score": 32.125, "loss": 3.551265001296997 }, { "id": "identity_9608", "image": "UniGeo/images/2310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 已知如图,AD:BD=1:2,DE∥BC,若DE=2时,BC的值是()\nChoices:\n(A) 1\n(B) 3\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.1875, "loss": 2.53279185295105 }, { "id": "identity_231658", "image": "TQA/images/abc_question_images/nuclear_energy_18109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of the labels in the diagram represents an element with two protons and two neutrons, and what is the total number of protons and neutrons in the nucleus of this element?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.109375, "loss": 4.014157295227051 }, { "id": "identity_193396", "image": "PMC-VQA/images/PMC8201921_Fig2_467736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the bone that is at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Occipital bone" } ], "clip_score": 30.359375, "loss": 2.6072261333465576 }, { "id": "identity_230823", "image": "FigureQA/images/22661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Bubblegum and Periwinkle?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 27.515625, "loss": 1.7629876136779785 }, { "id": "identity_13216", "image": "A-OKVQA/images/553130.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When making a slow descent into water what is the safest way here?\nChoices:\n(A) rolling headfirst\n(B) dive headfirst\n(C) belly flop\n(D) ladder" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.609375, "loss": 3.7528815269470215 }, { "id": "identity_32291", "image": "PMC-VQA/images/PMC5519540_Fig3_193343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dotted lines indicate in the images? \nChoices:\n(A) Blood vessels. \n(B) Outline of the lungs. \n(C) Outline of the heart. \n(D) Outline of the digestive system. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.390625, "loss": 4.188271999359131 }, { "id": "identity_266278", "image": "ChartQA/images/18051975000655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the provided graph, which two countries are being compared?" }, { "from": "gpt", "value": "The answer is [Israel, Slovenia]" } ], "clip_score": 14.1171875, "loss": 2.5336453914642334 }, { "id": "identity_304687", "image": "A-OKVQA/images/330098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the white thing folded in the corner?\nChoices:\n(A) table\n(B) bed sheet\n(C) poster\n(D) box" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.203125, "loss": 4.378269195556641 }, { "id": "identity_278020", "image": "UniGeo/images/1258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, BM is tangent to circle O at point B. If ∠MBA = 140°, what is the degree measure of ∠ACB?\nChoices:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.71875, "loss": 2.2331573963165283 }, { "id": "identity_165296", "image": "UniGeo/images/2847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the circle has a radius of 5 units, what is the area of sector BOC?" }, { "from": "gpt", "value": "The answer is (110/360)*π*25 square units" } ], "clip_score": 32.84375, "loss": 2.4463658332824707 }, { "id": "identity_134622", "image": "FigureQA/images/64583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Light Gold line intersect with the Dark Gold line?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.734375, "loss": 2.243459463119507 }, { "id": "identity_266319", "image": "IconQA/images/53548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the nail in inches when measured to the nearest inch using the ruler?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.65625, "loss": 2.7814033031463623 }, { "id": "identity_72648", "image": "ScienceQA/images/14092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sedimentary rock is shown in the image?" }, { "from": "gpt", "value": "The answer is Sandstone or Conglomerate" } ], "clip_score": 29.609375, "loss": 2.2909133434295654 }, { "id": "identity_71466", "image": "AI2D/images/3762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the Diaphragm in the respiratory system?" }, { "from": "gpt", "value": "The answer is It helps in breathing" } ], "clip_score": 36.5, "loss": 2.8450140953063965 }, { "id": "identity_28351", "image": "VQA-AS/images/5241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How did the lady get to the park?" }, { "from": "gpt", "value": "The answer is moped" } ], "clip_score": 26.25, "loss": 2.670640230178833 }, { "id": "identity_251518", "image": "VQA2.0/images/305259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the street name and the visible shops, identify the specific neighborhood in London where this picture was taken" }, { "from": "gpt", "value": "The answer is Chinatown" } ], "clip_score": 32.53125, "loss": 1.7385780811309814 }, { "id": "identity_270807", "image": "ChartQA/images/4605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the rightmost value on a graph is 19, what color is the graph?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 14.53125, "loss": 3.242359161376953 }, { "id": "identity_117108", "image": "PMC-VQA/images/PMC5884212_fig5_297237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the brain images?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 34.4375, "loss": 2.787536144256592 }, { "id": "identity_224597", "image": "IconQA/images/60208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line is divided into 5 equal segments, what is the length of each segment in centimeters?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.390625, "loss": 2.5495309829711914 }, { "id": "identity_251465", "image": "PMC-VQA/images/PMC6352060_F1_428944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the given MRI type, what are the key characteristics and clinical applications of a T2-weighted MRI?" }, { "from": "gpt", "value": "The answer is Bright fluid, long TR/TE, detects edema, inflammation, lesions, tumors" } ], "clip_score": 34.4375, "loss": 3.1200482845306396 }, { "id": "identity_40929", "image": "DocVQA/images/lznw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the clause number that states there are no unembodied statements or warranties?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 23.5, "loss": 2.41648530960083 }, { "id": "identity_11037", "image": "TabMWP/images/25888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Sophie counted the number of ducks at each pond in the city. What is the largest number of ducks?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 27.109375, "loss": 2.5404837131500244 }, { "id": "identity_237189", "image": "Geometry3K/images/917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the total length of the line segment connecting the centers of circles \\(\\odot A\\), \\(\\odot B\\), and \\(\\odot C\\)" }, { "from": "gpt", "value": "The answer is 27.5 inches" } ], "clip_score": 34.125, "loss": 2.303760528564453 }, { "id": "identity_192998", "image": "A-OKVQA/images/107247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bus's advertisement?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 27.125, "loss": 4.445043563842773 }, { "id": "identity_86075", "image": "GeoQA+/images/677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the diagonals of parallelogram ABCD intersect at point E and angle AEB measures 70 degrees, what is the measure of angle DEC?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 31.71875, "loss": 1.5008673667907715 }, { "id": "identity_2300", "image": "DocVQA/images/xrbd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the staff, White house conference on Food, Nutrition and Health?" }, { "from": "gpt", "value": "The answer is Judith A. Cooke (Mrs.)" } ], "clip_score": 23.984375, "loss": 2.810150384902954 }, { "id": "identity_119340", "image": "A-OKVQA/images/96712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is in the background?" }, { "from": "gpt", "value": "The answer is A gazebo" } ], "clip_score": 21.09375, "loss": 2.8325023651123047 }, { "id": "identity_121531", "image": "A-OKVQA/images/151095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the vehicles going straight?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.640625, "loss": 2.90122127532959 }, { "id": "identity_169571", "image": "TabMWP/images/9510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a person buys one of each item, how much money will they spend in total?" }, { "from": "gpt", "value": "The answer is $354" } ], "clip_score": 23.296875, "loss": 2.403514862060547 }, { "id": "identity_81230", "image": "GeoQA+/images/1290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of arc APB if the radius of the circle is 10 units?" }, { "from": "gpt", "value": "The answer is 5π/3 units" } ], "clip_score": 31.515625, "loss": 1.8492963314056396 }, { "id": "identity_243383", "image": "TabMWP/images/17134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine if the relation given by the table is a one-to-one function. If it is, explain why. If it is not, provide an example of why it is not" }, { "from": "gpt", "value": "The answer is Yes, it is a one-to-one function" } ], "clip_score": 24.71875, "loss": 1.6262726783752441 }, { "id": "identity_69835", "image": "TQA/images/question_images/waves_7680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the lowest point of the wave?" }, { "from": "gpt", "value": "The answer is Trough" } ], "clip_score": 34.46875, "loss": 2.9024813175201416 }, { "id": "identity_144989", "image": "DVQA/images/bar_train_00040395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per item?" }, { "from": "gpt", "value": "The answer is 5.83" } ], "clip_score": 23.609375, "loss": 2.1489462852478027 }, { "id": "identity_186552", "image": "TextVQA/images/003d25460307a847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many figurines of the same character are there on the shelf?" }, { "from": "gpt", "value": "The answer is Ten" } ], "clip_score": 25.34375, "loss": 4.130467414855957 }, { "id": "identity_97343", "image": "TabMWP/images/15943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of participants in the races?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 24.390625, "loss": 2.850994110107422 }, { "id": "identity_117712", "image": "TextVQA/images/63b7d07254a2cbfc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is written on the circular emblem on the side of the float?" }, { "from": "gpt", "value": "The answer is Canines with Courage" } ], "clip_score": 24.5, "loss": 2.090243339538574 }, { "id": "identity_211857", "image": "TextVQA/images/294ddfe19f5063a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main colors of the crowd's attire?" }, { "from": "gpt", "value": "The answer is Orange and white" } ], "clip_score": 22.171875, "loss": 3.1326677799224854 }, { "id": "identity_62837", "image": "AI2D/images/4867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the geological feature labeled as \"Trench\"?" }, { "from": "gpt", "value": "The answer is Oceanic Trench" } ], "clip_score": 30.640625, "loss": 2.204042911529541 }, { "id": "identity_133465", "image": "A-OKVQA/images/355342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrellas' fabric?" }, { "from": "gpt", "value": "The answer is Transparent" } ], "clip_score": 20.578125, "loss": 4.628814220428467 }, { "id": "identity_126403", "image": "VQA2.0/images/459563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of habitat is the bird in?" }, { "from": "gpt", "value": "The answer is Forest" } ], "clip_score": 24.15625, "loss": 2.9601516723632812 }, { "id": "identity_194859", "image": "VizWiz/images/VizWiz_train_00008703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Afternoon" } ], "clip_score": 20.09375, "loss": 2.7168147563934326 }, { "id": "identity_286877", "image": "TabMWP/images/1716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Harold missed the 12:15 P.M. bus at the science museum. What is the departure time of the next bus according to the schedule provided?\nChoices:\n(A) 12:15 P.M.\n(B) 10:30 A.M.\n(C) 11:30 A.M.\n(D) 12:30 P.M." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.65625, "loss": 2.7804386615753174 }, { "id": "identity_216264", "image": "A-OKVQA/images/117222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the bananas?" }, { "from": "gpt", "value": "The answer is The predominant color of the bananas" } ], "clip_score": 25.015625, "loss": 2.197852849960327 }, { "id": "identity_55733", "image": "FigureQA/images/56812.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is The title is \"title" } ], "clip_score": 34.46875, "loss": 4.303465366363525 }, { "id": "identity_155349", "image": "AI2D/images/1946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of this food web?" }, { "from": "gpt", "value": "The answer is Anaconda" } ], "clip_score": 34.375, "loss": 3.647991180419922 }, { "id": "identity_238495", "image": "FigureQA/images/66744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest values of the Aqua and Dark Salmon lines?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 27.921875, "loss": 2.4604055881500244 }, { "id": "identity_103189", "image": "CLEVR-Math/images/CLEVR_train_053048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are present in the objects?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.625, "loss": 1.8913551568984985 }, { "id": "identity_144244", "image": "DocVQA/images/kzbn0226_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the statutory design of TSCA as amended by LCSA, according to the text?" }, { "from": "gpt", "value": "The answer is To universally include \"all conditions of use" } ], "clip_score": 24.90625, "loss": 4.845870018005371 }, { "id": "identity_174015", "image": "IconQA/images/99324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who went to soccer practice first?" }, { "from": "gpt", "value": "The answer is Rob" } ], "clip_score": 17.671875, "loss": 2.984053373336792 }, { "id": "identity_287457", "image": "VQA-AS/images/11720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of paintings?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 16.453125, "loss": 2.1477184295654297 }, { "id": "identity_2441", "image": "IconQA/images/94730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Samuel is coming home from work in the evening. The clock in Samuel's car shows the time. What time is it?\nChoices:\n(A) 6:00 P.M.\n(B) 6:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 3.602426052093506 }, { "id": "identity_165832", "image": "Geometry3K/images/36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of the three angles in the triangle formed inside the circle?" }, { "from": "gpt", "value": "The answer is 180° + x°" } ], "clip_score": 32.8125, "loss": 2.184001922607422 }, { "id": "identity_13563", "image": "Super-CLEVR/images/superCLEVR_new_000522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a red car that is the same size as the gray metallic object; what material is it?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 29.59375, "loss": 3.4760689735412598 }, { "id": "identity_224574", "image": "ScienceQA/images/3250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state highlighted in the image, and which major river flows through this capital city?" }, { "from": "gpt", "value": "The answer is Des Moines, Des Moines River" } ], "clip_score": 26.578125, "loss": 2.111898183822632 }, { "id": "identity_308812", "image": "ScienceQA/images/18726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony depicted?\nChoices:\n(A) South Carolina\n(B) North Carolina\n(C) Connecticut\n(D) Georgia" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.765625, "loss": 4.028820991516113 }, { "id": "identity_315876", "image": "DVQA/images/bar_train_00196131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units of this item were sold across all the stores?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 24.734375, "loss": 2.4001705646514893 }, { "id": "identity_334425", "image": "ScienceQA/images/9850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) blue\n(B) sour\n(C) salty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.046875, "loss": 4.223830699920654 }, { "id": "identity_219002", "image": "PMC-VQA/images/PMC7781868_FIG7_334373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the red color in this image?" }, { "from": "gpt", "value": "The answer is Blood flow towards the probe" } ], "clip_score": 27.4375, "loss": 4.739016532897949 }, { "id": "identity_35837", "image": "CLEVR-Math/images/CLEVR_train_008853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all brown cylinders. How many purple blocks are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.390625, "loss": 2.3580100536346436 }, { "id": "identity_187132", "image": "PMC-VQA/images/PMC6909617_Fig1_83161.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the abbreviation FFC stand for in the context of this image?" }, { "from": "gpt", "value": "The answer is FFC stands for Femoral Frontal Condylar" } ], "clip_score": 36.0, "loss": 2.2609779834747314 }, { "id": "identity_315000", "image": "PMC-VQA/images/PMC6342526_fig2s1_425930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this depict?\nChoices:\n(A) 3DAS old seedlings \n(B) 4DAS old seedlings \n(C) 5DAS old seedlings \n(D) 6DAS old seedlings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.28125, "loss": 3.4515233039855957 }, { "id": "identity_209929", "image": "A-OKVQA/images/59015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the individual standing in the barge wearing?" }, { "from": "gpt", "value": "The answer is Sneakers" } ], "clip_score": 25.125, "loss": 2.923022508621216 }, { "id": "identity_24634", "image": "TabMWP/images/8582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Sofia is at Oyster Lighthouse. If she wants to arrive at Kite Coast at 12.30 P.M., what time should she get on the boat?\nChoices:\n(A) 9:30 A.M.\n(B) 10:45 A.M.\n(C) 12:30 P.M.\n(D) 10:00 A.M." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.53125, "loss": 3.3708558082580566 }, { "id": "identity_32969", "image": "ChartQA/images/1831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the percentage of people who are at least somewhat confident that Mike Pence did a good job?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 24.453125, "loss": 2.769092082977295 }, { "id": "identity_86831", "image": "GeoQA+/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ADC is 35° and OA is perpendicular to BC, what is the measure of ∠OAC?" }, { "from": "gpt", "value": "The answer is 55°" } ], "clip_score": 31.375, "loss": 1.8835701942443848 }, { "id": "identity_193922", "image": "PMC-VQA/images/PMC5015237_Fig2_71228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the circle marking the abnormality?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.359375, "loss": 4.143915176391602 }, { "id": "identity_123679", "image": "PMC-VQA/images/PMC5250663_Fig1_116763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.375, "loss": 2.2718470096588135 }, { "id": "identity_148508", "image": "DocVQA/images/yxph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the copies of the report addressed to?" }, { "from": "gpt", "value": "The answer is General Chemist, District Engineer, Dist. Supt., Superintendent, Laboratory" } ], "clip_score": 25.03125, "loss": 3.2006022930145264 }, { "id": "identity_74543", "image": "AI2D/images/4380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird has talons that are adapted for a diet consisting mainly of carrion?" }, { "from": "gpt", "value": "The answer is New World vulture (Cathartes)" } ], "clip_score": 33.0, "loss": 2.107173204421997 }, { "id": "identity_37776", "image": "PMC-VQA/images/PMC5991000_Fig2_325329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to visualize the osteolytic lesion? \nChoices:\n(A) Magnetic Resonance Imaging (MRI) \n(B) Computed Tomography (CT) \n(C) X-rays \n(D) Positron Emission Tomography (PET) " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 3.9350829124450684 }, { "id": "identity_95293", "image": "TabMWP/images/34663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more guests did the hotel have on Monday compared to Sunday?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 36.9375, "loss": 2.678758382797241 }, { "id": "identity_33343", "image": "VQA2.0/images/528642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is she sleeping?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.359375, "loss": 3.717984676361084 }, { "id": "identity_216374", "image": "TextVQA/images/a32ffa9bf5aa2aa5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any figures wearing a crown in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 9.953125, "loss": 3.4558448791503906 }, { "id": "identity_212958", "image": "TextVQA/images/00f9078d169011a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the batter wearing?" }, { "from": "gpt", "value": "The answer is Cleats" } ], "clip_score": 25.59375, "loss": 3.096822738647461 }, { "id": "identity_303321", "image": "Super-CLEVR/images/superCLEVR_new_001206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the red bus made of the same material as the purple cruiser in front of the green metallic tandem bike?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 25.203125, "loss": 5.021944999694824 }, { "id": "identity_198942", "image": "PMC-VQA/images/PMC7254724_Fig3_179943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the blue spots within the sphere?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 21.96875, "loss": 3.6657400131225586 }, { "id": "identity_142846", "image": "FigureQA/images/29827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the x-axis?" }, { "from": "gpt", "value": "The answer is xaxis label" } ], "clip_score": 27.84375, "loss": 2.822727680206299 }, { "id": "identity_104659", "image": "PMC-VQA/images/PMC8020193_FIG1_409560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What medical device is visible on the patient's body in the image?" }, { "from": "gpt", "value": "The answer is A catheter or tube" } ], "clip_score": 32.09375, "loss": 2.8170242309570312 }, { "id": "identity_126565", "image": "VQA2.0/images/43001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any motorcycles in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is one motorcycle" } ], "clip_score": 16.109375, "loss": 2.653862953186035 }, { "id": "identity_337448", "image": "DocVQA/images/pkyn0081_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the test name mentioned in this report?" }, { "from": "gpt", "value": "The answer is burley smoke comp. study" } ], "clip_score": 25.546875, "loss": 4.942835807800293 }, { "id": "identity_36950", "image": "VQA2.0/images/558508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the board man in front of the store wearing on his head?" }, { "from": "gpt", "value": "The answer is chefs hat" } ], "clip_score": 20.734375, "loss": 2.95886492729187 }, { "id": "identity_193707", "image": "PMC-VQA/images/PMC4331820_F3_359517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of kidney stones in the image?" }, { "from": "gpt", "value": "The answer is No evidence of kidney stones" } ], "clip_score": 28.171875, "loss": 2.9665350914001465 }, { "id": "identity_95558", "image": "TabMWP/images/11514.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of one remote control car and two toy guitars?" }, { "from": "gpt", "value": "The answer is $73.00" } ], "clip_score": 28.40625, "loss": 1.6699697971343994 }, { "id": "identity_100927", "image": "TabMWP/images/5092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more pizzas were delivered on the busiest day compared to the slowest day?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 37.21875, "loss": 2.441983222961426 }, { "id": "identity_337020", "image": "TQA/images/question_images/blastocyst_9034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Considering a Blastocyst, which part will develop into a new human being?\nChoices:\n(A) Embryoblast\n(B) Trophoblast\n(C) Blastocyst Cavity\n(D) Zygote" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.65625, "loss": 5.081469535827637 }, { "id": "identity_149362", "image": "DocVQA/images/tsmk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the newsletter editor mentioned in the memo?" }, { "from": "gpt", "value": "The answer is Al Parker" } ], "clip_score": 23.671875, "loss": 5.17814826965332 }, { "id": "identity_71215", "image": "ScienceQA/images/15773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Idaho?" }, { "from": "gpt", "value": "The answer is Nevada" } ], "clip_score": 31.359375, "loss": 2.421557664871216 }, { "id": "identity_4930", "image": "UniGeo/images/1185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,AD⊥BC,BE⊥AC,AD,BE相交于点M,若AC=8,BM=4,则⊙O的半径等于()\nChoices:\n(A) 2√{5}\n(B) 2√{3}\n(C) 4√{2}\n(D) 6" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.265625, "loss": 2.483736515045166 }, { "id": "identity_184292", "image": "VQA2.0/images/516114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any leafy green in the dish?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.203125, "loss": 3.3621909618377686 }, { "id": "identity_283081", "image": "IconQA/images/59393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of flowers?\nChoices:\n(A) 53\n(B) 52\n(C) 54" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.25, "loss": 3.1421053409576416 }, { "id": "identity_55311", "image": "FigureQA/images/89411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Aqua have the lowest starting value on the y-axis?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.453125, "loss": 3.151038408279419 }, { "id": "identity_307672", "image": "VizWiz/images/VizWiz_train_00006138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this item?" }, { "from": "gpt", "value": "The answer is rug" } ], "clip_score": 28.5, "loss": 3.8133792877197266 }, { "id": "identity_43959", "image": "FigureQA/images/93238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Medium Blue line above the Blue line at x-axis value 80?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.0, "loss": 3.363924503326416 }, { "id": "identity_160720", "image": "Geometry3K/images/749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If P R \\parallel W X, W X = 10, X Y = 6, W Y = 8, R Y = 5, and P S = 3, find the length of P Y" }, { "from": "gpt", "value": "The answer is 3.75" } ], "clip_score": 27.421875, "loss": 2.036496162414551 }, { "id": "identity_89049", "image": "UniGeo/images/1398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle BCD is 50 degrees, what is the measure of angle ACD?" }, { "from": "gpt", "value": "The answer is 140 degrees" } ], "clip_score": 35.125, "loss": 1.6587061882019043 }, { "id": "identity_235955", "image": "A-OKVQA/images/199883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which person is in the lane designated for bicycles, and what color is their shirt?" }, { "from": "gpt", "value": "The answer is blue shirt" } ], "clip_score": 25.921875, "loss": 3.4730474948883057 }, { "id": "identity_337825", "image": "IconQA/images/68489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) yellow\n(B) white" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.4375, "loss": 4.090951442718506 }, { "id": "identity_220109", "image": "PMC-VQA/images/PMC5360966_fig2_143865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the lesions compared to the surrounding lung tissue?" }, { "from": "gpt", "value": "The answer is Denser than surrounding lung tissue" } ], "clip_score": 26.90625, "loss": 2.153242349624634 }, { "id": "identity_228844", "image": "FigureQA/images/48399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Pale Green section, and how does it compare to the percentages of the Dark Turquoise and Gold sections?" }, { "from": "gpt", "value": "The answer is Pale Green has the highest percentage" } ], "clip_score": 43.96875, "loss": 3.374260663986206 }, { "id": "identity_277836", "image": "CLEVR-Math/images/CLEVR_train_026844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you subtract 0 red blocks, how many objects remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 17.953125, "loss": 2.023622751235962 }, { "id": "identity_326303", "image": "PMC-VQA/images/PMC6607307_gch2201800040-fig-0003_3410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material shown in the SEM images?\nChoices:\n(A) Metal\n(B) Plastic\n(C) Glass\n(D) Wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.15625, "loss": 4.1870903968811035 }, { "id": "identity_163826", "image": "Geometry3K/images/2047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of angles Q and S in the parallelogram PQRS?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 36.03125, "loss": 1.392475962638855 }, { "id": "identity_306314", "image": "TQA/images/question_images/cycle_rock_6741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if you apply heat and pressure to this type of rock?\nChoices:\n(A) Turn to magma\n(B) Turns to sediment\n(C) Turns to sedimentary rock\n(D) Turns to metamorphic rock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.5625, "loss": 3.6398024559020996 }, { "id": "identity_278728", "image": "Super-CLEVR/images/superCLEVR_new_000120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of objects that share the same material as the large airliner?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.4375, "loss": 2.5478060245513916 }, { "id": "identity_129421", "image": "PMC-VQA/images/PMC5265199_Fig3_118746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the fracture shown in the image displaced or non-displaced?" }, { "from": "gpt", "value": "The answer is Non-displaced" } ], "clip_score": 26.15625, "loss": 1.9561437368392944 }, { "id": "identity_155585", "image": "ScienceQA/images/14177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which U.S. state is this landmark located?" }, { "from": "gpt", "value": "The answer is Arizona" } ], "clip_score": 26.625, "loss": 2.689256429672241 }, { "id": "identity_3924", "image": "FigureQA/images/16315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.125, "loss": 4.27904748916626 }, { "id": "identity_323934", "image": "IconQA/images/101049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this shape?\nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.140625, "loss": 4.370543479919434 }, { "id": "identity_235307", "image": "AI2D/images/632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the path magma takes from the magma chamber to the ash cloud, naming all the structures it passes through in order" }, { "from": "gpt", "value": "The answer is magma chamber, main vent/conduit, crater, ash cloud" } ], "clip_score": 39.625, "loss": 1.6107919216156006 }, { "id": "identity_255246", "image": "IconQA/images/92856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of picking a black marble, and if you pick two marbles without looking, what is the probability that both are black?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 30.09375, "loss": 2.5041420459747314 }, { "id": "identity_29045", "image": "TQA/images/abc_question_images/earth_day_night_12744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the Arctic Circle?\nChoices:\n(A) H\n(B) F\n(C) T\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.078125, "loss": 4.266484260559082 }, { "id": "identity_72310", "image": "ScienceQA/images/12931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the pine tree is responsible for the production of pollen?" }, { "from": "gpt", "value": "The answer is male cone" } ], "clip_score": 35.3125, "loss": 2.6494102478027344 }, { "id": "identity_169738", "image": "IconQA/images/43466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the time is 30 minutes earlier, what would be the time?" }, { "from": "gpt", "value": "The answer is 2:00" } ], "clip_score": 26.203125, "loss": 1.5534635782241821 }, { "id": "identity_274483", "image": "Geometry3K/images/1894.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the length of E H if triangle A B C is similar to triangle D E F, B G is an altitude of triangle A B C, E H is an altitude of triangle D E F, B G = 3, B F = 4, F C = 2, and C E = 1. \nOptions:\n(A) 1\n(B) 3/2\n(C) 2\n(D) 3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.3125, "loss": 2.4059994220733643 }, { "id": "identity_270442", "image": "DocVQA/images/lzvg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the date of the Agenda?" }, { "from": "gpt", "value": "The answer is november 20, 1975" } ], "clip_score": 22.0625, "loss": 2.072460412979126 }, { "id": "identity_52492", "image": "DVQA/images/bar_train_00057222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have an accuracy of 5 or more on the 'hair' dataset?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.375, "loss": 2.2362279891967773 }, { "id": "identity_184690", "image": "PMC-VQA/images/PMC7732650_F2_319768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in age between the patient in the third and fifth MRI images?" }, { "from": "gpt", "value": "The answer is 8 years" } ], "clip_score": 33.21875, "loss": 2.410046339035034 }, { "id": "identity_38403", "image": "ChartQA/images/OECD_FOREIGN-BORN_UNEMPLOYMENT_IRL_NZL_SVK_SWE_USA_000028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year recorded the highest foreign born unemployment in Slovak Republic?" }, { "from": "gpt", "value": "The answer is 2011" } ], "clip_score": 31.484375, "loss": 1.724893569946289 }, { "id": "identity_11209", "image": "UniGeo/images/2954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,弦CD丄AB,∠CAB=20°,则∠BOD等于()\nChoices:\n(A) 30°\n(B) 40°\n(C) 45°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 2.5823745727539062 }, { "id": "identity_32927", "image": "A-OKVQA/images/294714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Approximately how many people live in this city?\nChoices:\n(A) 2 million\n(B) 1 billion\n(C) 9 million\n(D) 6 million" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.59375, "loss": 3.0157153606414795 }, { "id": "identity_26046", "image": "IconQA/images/96409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Rick's afternoon picnic is almost over. His watch shows the time. What time is it?\nChoices:\n(A) 4:00 A.M.\n(B) 4:00 P.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.34375, "loss": 3.613973617553711 }, { "id": "identity_315774", "image": "TextVQA/images/17b881993b128853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time?" }, { "from": "gpt", "value": "The answer is 3:10" } ], "clip_score": 17.875, "loss": 2.0001578330993652 }, { "id": "identity_318628", "image": "ScienceQA/images/2403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the state of the air inside a tire: solid, liquid, or gas?\nChoices:\n(A) a solid\n(B) a liquid\n(C) a gas" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.453125, "loss": 3.870694398880005 }, { "id": "identity_111718", "image": "A-OKVQA/images/466882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the court?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.09375, "loss": 3.5250277519226074 }, { "id": "identity_16027", "image": "GeoQA+/images/5220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,AD是∠BAC的平分线,延长AD至E,使AD=DE,连接BE,若AB=3AC,△BDE的面积为9,则△ABC的面积是()\nChoices:\n(A) 6\n(B) 9\n(C) 12\n(D) 15" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.515625, "loss": 2.568620204925537 }, { "id": "identity_134663", "image": "DVQA/images/bar_train_00155441.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy value for the \"feel\" dataset on the \"heart\" algorithm?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.78125, "loss": 3.037893533706665 }, { "id": "identity_268288", "image": "Geometry3K/images/1018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of z.\nOptions:\n(A) 12\n(B) 13\n(C) 14\n(D) 15" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.671875, "loss": 2.8837456703186035 }, { "id": "identity_228503", "image": "FigureQA/images/17620.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sum of Khaki and Light Green less than Crimson?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 37.0625, "loss": 3.900006055831909 }, { "id": "identity_171510", "image": "IconQA/images/61513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one purple marble, what fraction of the remaining marbles will be black?" }, { "from": "gpt", "value": "The answer is 1/5" } ], "clip_score": 35.96875, "loss": 1.5196738243103027 }, { "id": "identity_294253", "image": "PMC-VQA/images/PMC4094594_F1_305571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the image: Computed Tomography (CT) scan, Magnetic Resonance Imaging (MRI), X-ray, or Ultrasound?\nChoices:\n(A) Computed Tomography (CT) scan \n(B) Magnetic Resonance Imaging (MRI) \n(C) X-ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.984375, "loss": 3.676271915435791 }, { "id": "identity_84047", "image": "GeoQA+/images/4712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AC is the diameter of the circle O and ∠BAC = 35°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 35.375, "loss": 2.1487057209014893 }, { "id": "identity_168858", "image": "TabMWP/images/1819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times did the number of rounds in the spelling bee stay the same from one year to the next?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 38.15625, "loss": 2.7457497119903564 }, { "id": "identity_175279", "image": "IconQA/images/4285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total area of the green shape in square units?" }, { "from": "gpt", "value": "The answer is 16 square units" } ], "clip_score": 23.765625, "loss": 1.7130481004714966 }, { "id": "identity_26984", "image": "AI2D/images/389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the above food web diagram, what cause kingfisher to increase\nChoices:\n(A) decrease in fish\n(B) decrease in water boatman\n(C) increase in fish\n(D) increase in algae" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.6875, "loss": 3.5360076427459717 }, { "id": "identity_16157", "image": "UniGeo/images/2725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,在▱ABCD中,AB=10cm,AB边上的高DH=4cm,BC=6cm,求BC边上的高DF的长()\nChoices:\n(A) 15cm\n(B) 10cm\n(C) 17cm\n(D) \\frac{20}{3}cm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.015625, "loss": 2.371485471725464 }, { "id": "identity_195612", "image": "A-OKVQA/images/161871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the barrel the elephant is standing on?" }, { "from": "gpt", "value": "The answer is Stars" } ], "clip_score": 27.359375, "loss": 3.103344440460205 }, { "id": "identity_194363", "image": "TextVQA/images/0e88544270a85d06.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of shoe is the scorpion in?" }, { "from": "gpt", "value": "The answer is KEEN" } ], "clip_score": 30.515625, "loss": 2.863755941390991 }, { "id": "identity_152789", "image": "AI2D/images/2737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which phase of the moon is shown as a half black and half white circle in the bottom row?" }, { "from": "gpt", "value": "The answer is Last Quarter" } ], "clip_score": 33.40625, "loss": 2.7178380489349365 }, { "id": "identity_311955", "image": "TextVQA/images/4037c8b4f1d31156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is busch stadium" } ], "clip_score": 35.34375, "loss": 2.7327442169189453 }, { "id": "identity_24079", "image": "A-OKVQA/images/316441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the man kneeling on the ground?\nChoices:\n(A) he fell\n(B) dancing\n(C) praying\n(D) repairing something" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.953125, "loss": 3.5040276050567627 }, { "id": "identity_244522", "image": "GeoQA+/images/1656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,连接OA、OC,⊙O的半径R=2,sinB=\\frac{3}{4},若弦AC的中点为M,求OM的长。" }, { "from": "gpt", "value": "The answer is \\(\\frac{\\sqrt{7}}{2}\\)" } ], "clip_score": 29.046875, "loss": 2.308781623840332 }, { "id": "identity_102087", "image": "TabMWP/images/24229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you want to watch a movie that ends before 3:00 P.M., which options do you have?" }, { "from": "gpt", "value": "The answer is The Fairy Tale Princess, The Crystal Ball of Fortune, Cartoon Penguin Adventure, The True Story of a Championship Team" } ], "clip_score": 29.046875, "loss": 0.7928375005722046 }, { "id": "identity_119470", "image": "VQA2.0/images/310006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the airplanes trailing smoke?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.609375, "loss": 3.0200886726379395 }, { "id": "identity_223032", "image": "A-OKVQA/images/178078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the make of the motorcycle?" }, { "from": "gpt", "value": "The answer is Honda" } ], "clip_score": 24.015625, "loss": 2.9887924194335938 }, { "id": "identity_181171", "image": "TabMWP/images/29187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Molly need to buy 3 train tickets to Seattle?" }, { "from": "gpt", "value": "The answer is $2,826.00" } ], "clip_score": 28.25, "loss": 1.7227638959884644 }, { "id": "identity_315268", "image": "GeoQA+/images/907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,C岛在某岛的北偏东45°方向,C岛在另一岛的北偏西25°方向,则从C岛看这两岛的视角∠ACB的度数是()\n选项:\n(A) 70°\n(B) 20°\n(C) 35°\n(D) 110°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.34375, "loss": 2.675919532775879 }, { "id": "identity_204178", "image": "PMC-VQA/images/PMC5609013_Fig2_218491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible hardware, such as screws or plates, in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.296875, "loss": 3.0998482704162598 }, { "id": "identity_252738", "image": "A-OKVQA/images/79869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many key chains are visible in the image, and what other items are present on the desk besides the key chains?" }, { "from": "gpt", "value": "The answer is 7 key chains, laptop, telephone, mug, mouse, mouse pad, papers, pen holder, plant, decorative items" } ], "clip_score": 22.703125, "loss": 2.361504316329956 }, { "id": "identity_84834", "image": "Geometry3K/images/1487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle ABC?" }, { "from": "gpt", "value": "The answer is 33.124 inches" } ], "clip_score": 33.625, "loss": 2.182894706726074 }, { "id": "identity_57631", "image": "DocVQA/images/xggn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the patient's last name as per the form?" }, { "from": "gpt", "value": "The answer is Not legible" } ], "clip_score": 23.65625, "loss": 4.249909400939941 }, { "id": "identity_212876", "image": "A-OKVQA/images/257940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest?" }, { "from": "gpt", "value": "The answer is Evening" } ], "clip_score": 18.328125, "loss": 3.313436508178711 }, { "id": "identity_329619", "image": "DVQA/images/bar_train_00189522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of all the values in this group?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 18.984375, "loss": 2.4021060466766357 }, { "id": "identity_212648", "image": "A-OKVQA/images/67116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the couch?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 21.625, "loss": 4.018221855163574 }, { "id": "identity_148715", "image": "ChartQA/images/two_col_102588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage decrease in the risk index score from 2016 to 2018?" }, { "from": "gpt", "value": "The answer is 20.39%" } ], "clip_score": 27.34375, "loss": 1.9029186964035034 }, { "id": "identity_219750", "image": "A-OKVQA/images/70033.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible window in the room?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.828125, "loss": 2.9197163581848145 }, { "id": "identity_74948", "image": "ScienceQA/images/17187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object in the image is not a food item?" }, { "from": "gpt", "value": "The answer is Tree bark" } ], "clip_score": 20.15625, "loss": 2.383600950241089 }, { "id": "identity_233893", "image": "A-OKVQA/images/64481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two objects the woman is carrying, and what are their dominant colors?" }, { "from": "gpt", "value": "The answer is Skateboard (orange), stuffed animal (blue)" } ], "clip_score": 28.375, "loss": 2.104564905166626 }, { "id": "identity_55987", "image": "DocVQA/images/rmbv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What edition of the International Congress is being advertised?" }, { "from": "gpt", "value": "The answer is 9th" } ], "clip_score": 19.109375, "loss": 2.208588123321533 }, { "id": "identity_308381", "image": "GeoQA+/images/1827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线AB、CD相交于点O.OE垂直于AB,∠DOB=60°,则∠EOC的大小为()\n选项:\n(A) 30°\n(B) 60°\n(C) 120°\n(D) 150°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.234375, "loss": 2.6047377586364746 }, { "id": "identity_163721", "image": "GeoQA+/images/5156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle HEF in the quadrilateral EFGH?" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 33.53125, "loss": 1.9727362394332886 }, { "id": "identity_199955", "image": "A-OKVQA/images/175047.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many workers are visible in the construction area?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 23.328125, "loss": 4.4955525398254395 }, { "id": "identity_215857", "image": "PMC-VQA/images/PMC6206383_Fig24_387536.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce this head CT image?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scan" } ], "clip_score": 33.25, "loss": 1.6330522298812866 }, { "id": "identity_11608", "image": "PMC-VQA/images/PMC7660331_sensors-20-06016-f002_296172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image on the upper right a view of? \nChoices:\n(A) the full 3D image \n(B) a front view \n(C) a side view \n(D) a cross sectional view " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.734375, "loss": 3.9160029888153076 }, { "id": "identity_90060", "image": "GeoQA+/images/1794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BOD in the given circle?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 35.34375, "loss": 1.7624034881591797 }, { "id": "identity_226441", "image": "FigureQA/images/82212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest value of Dark Cyan and the lowest value of Medium Mint?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 28.09375, "loss": 2.442962646484375 }, { "id": "identity_208700", "image": "VQA-AS/images/17248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is to the right of the image?" }, { "from": "gpt", "value": "The answer is Dog" } ], "clip_score": 15.40625, "loss": 4.031216144561768 }, { "id": "identity_63535", "image": "ScienceQA/images/13126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of yellow particles in both solutions combined?" }, { "from": "gpt", "value": "The answer is 13 particles" } ], "clip_score": 29.125, "loss": 3.3514161109924316 }, { "id": "identity_262540", "image": "DVQA/images/bar_train_00063228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the entire chart, which group of bars has the lowest value for an individual bar?" }, { "from": "gpt", "value": "The answer is get" } ], "clip_score": 27.75, "loss": 4.738230228424072 }, { "id": "identity_242445", "image": "A-OKVQA/images/465835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras with stripes are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.8125, "loss": 2.092548131942749 }, { "id": "identity_161916", "image": "GeoQA+/images/2576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OC is the angle bisector of ∠AOB, and DP is perpendicular to OA at point P, what is the length of OC if OD = 4?" }, { "from": "gpt", "value": "The answer is OC = 4 * OA / OP" } ], "clip_score": 35.5625, "loss": 2.5013186931610107 }, { "id": "identity_91152", "image": "Geometry3K/images/324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radius of the circumscribed circle?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.71875, "loss": 1.5932810306549072 }, { "id": "identity_10562", "image": "ScienceQA/images/18713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal's limbs are also adapted for climbing trees?\nChoices:\n(A) chimpanzee\n(B) red-necked wallaby" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.921875, "loss": 3.879335880279541 }, { "id": "identity_135598", "image": "ChartQA/images/multi_col_1312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between 16 to 24-year-olds and the total population that access music through copyright infringement?" }, { "from": "gpt", "value": "The answer is 11%" } ], "clip_score": 29.28125, "loss": 2.04475474357605 }, { "id": "identity_284803", "image": "FigureQA/images/91155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Dark Orchid or Light Slate? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.7265625, "loss": 3.8802685737609863 }, { "id": "identity_217426", "image": "A-OKVQA/images/94132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheeled vehicles are visible on the play mat?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.234375, "loss": 2.081156015396118 }, { "id": "identity_30873", "image": "PMC-VQA/images/PMC5991105_Fig1_325435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the abnormality shown on the three-dimensional CT scan?\nChoices:\n(A) Abnormal bronchus \n(B) Abnormal pulmonary vein \n(C) Abnormal artery \n(D) Abnormal bronchiole" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.546875, "loss": 3.694849729537964 }, { "id": "identity_77364", "image": "TQA/images/abc_question_images/parts_neuron_17227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label that points to the part of the neuron where the nucleus is located?" }, { "from": "gpt", "value": "The answer is S" } ], "clip_score": 32.28125, "loss": 3.1962008476257324 }, { "id": "identity_56939", "image": "FigureQA/images/19362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the pie chart divided into equal sections?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.78125, "loss": 3.424294948577881 }, { "id": "identity_257322", "image": "TabMWP/images/37211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of minutes Colin spent practicing tennis over the past 5 days, and how does this compare to the median number of minutes?" }, { "from": "gpt", "value": "The answer is Average is 96.4, median is 96" } ], "clip_score": 31.78125, "loss": 2.4789559841156006 }, { "id": "identity_229966", "image": "VQA2.0/images/390893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the people holding and what are they wearing on their feet?" }, { "from": "gpt", "value": "The answer is Surfboards and wetsuit boots" } ], "clip_score": 29.390625, "loss": 1.952677845954895 }, { "id": "identity_88770", "image": "GeoQA+/images/4185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle CMD if AB is parallel to CD?" }, { "from": "gpt", "value": "The answer is 88 degrees" } ], "clip_score": 32.625, "loss": 2.894822359085083 }, { "id": "identity_76400", "image": "TQA/images/question_images/human_system_nervous_6185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which lobe is located at the front of the brain?" }, { "from": "gpt", "value": "The answer is frontal lobe" } ], "clip_score": 29.703125, "loss": 2.2952232360839844 }, { "id": "identity_11024", "image": "DocVQA/images/qmjb0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Weight for Male of Age 60-69?" }, { "from": "gpt", "value": "The answer is 168.6" } ], "clip_score": 19.625, "loss": 2.0607495307922363 }, { "id": "identity_329334", "image": "A-OKVQA/images/6998.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the person riding their bike?\nChoices:\n(A) sidewalk\n(B) street\n(C) boardwalk\n(D) bike lane" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.921875, "loss": 4.014144420623779 }, { "id": "identity_118539", "image": "PMC-VQA/images/PMC3274479_F2_124818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a progression of the highlighted area from the first to the last image?" }, { "from": "gpt", "value": "The answer is No visible progression" } ], "clip_score": 25.890625, "loss": 3.828620672225952 }, { "id": "identity_140677", "image": "FigureQA/images/52042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between Light Salmon and Orange?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 18.671875, "loss": 2.257505178451538 }, { "id": "identity_115495", "image": "VQA2.0/images/151307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed on the clock tower?" }, { "from": "gpt", "value": "The answer is 4:30" } ], "clip_score": 24.734375, "loss": 1.6539857387542725 }, { "id": "identity_211773", "image": "A-OKVQA/images/358296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people wearing red jackets in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.3359375, "loss": 3.1997992992401123 }, { "id": "identity_82257", "image": "GeoQA+/images/3428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point C divides line segment AB into a ratio of 1:3, and point D is the midpoint of AB, what is the combined length of segments AC and CD if AB is 8 units long?" }, { "from": "gpt", "value": "The answer is 4 units" } ], "clip_score": 29.453125, "loss": 1.9914385080337524 }, { "id": "identity_321737", "image": "A-OKVQA/images/113757.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of space could this be called?\nChoices:\n(A) podium\n(B) amphitheater\n(C) stadium\n(D) enclave" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.0625, "loss": 5.380048751831055 }, { "id": "identity_261811", "image": "DocVQA/images/nkgv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide information about the Page?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 14.3515625, "loss": 3.449707269668579 }, { "id": "identity_132549", "image": "TextVQA/images/1cdbcc76107e4e19.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batter's helmet?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.6875, "loss": 3.3810641765594482 }, { "id": "identity_92491", "image": "IconQA/images/35542.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any shapes that are not pentagons?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.59375, "loss": 2.954366683959961 }, { "id": "identity_117424", "image": "A-OKVQA/images/215170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the devices on the table?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 27.328125, "loss": 3.303154706954956 }, { "id": "identity_76958", "image": "TQA/images/abc_question_images/nuclear_energy_18105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'C' represent in the diagram?" }, { "from": "gpt", "value": "The answer is Coal storage tower" } ], "clip_score": 23.859375, "loss": 3.8957080841064453 }, { "id": "identity_50544", "image": "DocVQA/images/nzmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the document?" }, { "from": "gpt", "value": "The answer is Robert E. Shank" } ], "clip_score": 27.96875, "loss": 2.021174430847168 }, { "id": "identity_318896", "image": "AI2D/images/936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many beaks are visible?\nChoices:\n(A) 3\n(B) 1\n(C) 2\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.03125, "loss": 3.7141571044921875 }, { "id": "identity_117009", "image": "VQA2.0/images/228434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the chair in front of the desk?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.90625, "loss": 3.2441773414611816 }, { "id": "identity_241605", "image": "A-OKVQA/images/161738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the city shown in the image and list three distinct architectural or cultural landmarks visible in the image that support your identification" }, { "from": "gpt", "value": "The answer is Moscow, arch structure, banners, VDNKh park" } ], "clip_score": 28.71875, "loss": 3.9130215644836426 }, { "id": "identity_104539", "image": "A-OKVQA/images/576652.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the pitcher's uniform?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.953125, "loss": 3.416125774383545 }, { "id": "identity_81643", "image": "Geometry3K/images/581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of \\angle 1, \\angle 2, and \\angle 3?" }, { "from": "gpt", "value": "The answer is 76°" } ], "clip_score": 32.21875, "loss": 2.161107063293457 }, { "id": "identity_102361", "image": "TabMWP/images/28987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which book is the least expensive?" }, { "from": "gpt", "value": "The answer is book about Asia" } ], "clip_score": 31.3125, "loss": 3.6503045558929443 }, { "id": "identity_182911", "image": "VQA-AS/images/14808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the fish in the pond?" }, { "from": "gpt", "value": "The answer is Orange and white" } ], "clip_score": 31.15625, "loss": 3.4534127712249756 }, { "id": "identity_228796", "image": "TextVQA/images/b1935c9505498f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the corporation that produces this beer and in which city is it located?" }, { "from": "gpt", "value": "The answer is Singha Corporation Co., Ltd., Bangkok" } ], "clip_score": 40.9375, "loss": 2.030985116958618 }, { "id": "identity_231058", "image": "TextVQA/images/0dcbcc3547b05310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands are advertised on the scoreboard and the fence combined, and what are they?" }, { "from": "gpt", "value": "The answer is 17 brands: Verizon Wireless, Capital One Bank, Coca-Cola, Team LSU, Super Chevy Dealers, LSU sports.net, People's Health, Powerade, LSU, LSU TAF, LUBA, The Advocate, Cintas, Louisiana Lottery, Campus Federal, Kentwood Springs, ING" } ], "clip_score": 26.171875, "loss": 2.003950357437134 }, { "id": "identity_32064", "image": "TabMWP/images/20001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An ice cream shop kept track of how many ice cream cones it sold in the past 4 days. On which day did the shop sell the most ice cream cones?\nChoices:\n(A) Saturday\n(B) Sunday\n(C) Monday\n(D) Tuesday" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.0, "loss": 4.109150409698486 }, { "id": "identity_324691", "image": "IconQA/images/84609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image shows a train. What is the sixth image?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.3125, "loss": 3.931676149368286 }, { "id": "identity_134385", "image": "DVQA/images/bar_train_00152484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the least accuracy on the 'stem' dataset?" }, { "from": "gpt", "value": "The answer is break" } ], "clip_score": 30.265625, "loss": 3.569558620452881 }, { "id": "identity_240474", "image": "VQA2.0/images/498204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting in the front row and how many people are standing behind the front row?" }, { "from": "gpt", "value": "The answer is 2 sitting, 5 standing" } ], "clip_score": 13.984375, "loss": 2.403597831726074 }, { "id": "identity_89096", "image": "UniGeo/images/20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the sector AOC?" }, { "from": "gpt", "value": "The answer is Approximately 61.11 square units" } ], "clip_score": 30.75, "loss": 2.4062907695770264 }, { "id": "identity_325083", "image": "TabMWP/images/12198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the table, what was the rate of change between these two days?" }, { "from": "gpt", "value": "The answer is -6" } ], "clip_score": 25.09375, "loss": 3.538496255874634 }, { "id": "identity_202406", "image": "VQA2.0/images/316194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the place written on the bus?" }, { "from": "gpt", "value": "The answer is Fairhaven Village" } ], "clip_score": 25.578125, "loss": 2.296942949295044 }, { "id": "identity_93272", "image": "IconQA/images/30799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the black line were extended to the 8th small division, what would its new length be in millimeters?" }, { "from": "gpt", "value": "The answer is 7 millimeters" } ], "clip_score": 28.875, "loss": 1.8917887210845947 }, { "id": "identity_7949", "image": "TQA/images/abc_question_images/cycle_carbon_15009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does CO2 go after it is in the air?\nChoices:\n(A) R DEAD ORGANISMS AND WASTE\n(B) E FACTORY AND VEHICLE EMISSIONS\n(C) C ANIMAL RESPIRATION\n(D) J PHOTOSYNTHESIS" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.59375, "loss": 3.8584542274475098 }, { "id": "identity_260933", "image": "A-OKVQA/images/495626.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street near the pedestrian crossing sign, and what is the color of the traffic light for the cars at that moment?" }, { "from": "gpt", "value": "The answer is 7 people, red traffic light" } ], "clip_score": 17.84375, "loss": 3.0147409439086914 }, { "id": "identity_79417", "image": "Geometry3K/images/124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If polygon BCDE is congruent to polygon RSTU, find the value of z" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.21875, "loss": 2.5093860626220703 }, { "id": "identity_290929", "image": "TextVQA/images/605191b3e49690ec.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is printed on the shirts of the TV crew?" }, { "from": "gpt", "value": "The answer is utc" } ], "clip_score": 15.359375, "loss": 4.485696792602539 }, { "id": "identity_334509", "image": "Super-CLEVR/images/superCLEVR_new_000727.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the matte minivan that matches the color of the utility bike?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 28.953125, "loss": 3.5278022289276123 }, { "id": "identity_161320", "image": "GeoQA+/images/1722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the triangle ABC, if D, E, and F are the midpoints of BC, AD, and CE respectively, and the area of triangle ABC is 28 cm², what is the length of segment EF if the length of AC is 8 cm?" }, { "from": "gpt", "value": "The answer is 4 cm" } ], "clip_score": 34.125, "loss": 1.734836459159851 }, { "id": "identity_100271", "image": "IconQA/images/83967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are there in total?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.0, "loss": 1.7474452257156372 }, { "id": "identity_134296", "image": "ChartQA/images/multi_col_604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did the 65 years and older group have a percentage higher than 5%?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 28.546875, "loss": 2.383230447769165 }, { "id": "identity_207855", "image": "A-OKVQA/images/363845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.75, "loss": 3.0911970138549805 }, { "id": "identity_150834", "image": "ScienceQA/images/10629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the approximate size of the mineral? (Assuming an average hand size for scale)" }, { "from": "gpt", "value": "The answer is Small, approximately 2-3 inches in diameter" } ], "clip_score": 25.125, "loss": 2.659313678741455 }, { "id": "identity_247591", "image": "PMC-VQA/images/PMC6701321_fig6_25844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cystic lesions are present in the proximal and midhumerus, and what is their relative size compared to each other?" }, { "from": "gpt", "value": "The answer is Two cystic lesions, proximal larger than midhumerus" } ], "clip_score": 27.203125, "loss": 2.126504421234131 }, { "id": "identity_217697", "image": "A-OKVQA/images/326168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What message is advertised on the back of the bus?" }, { "from": "gpt", "value": "The answer is Every Lane is a Bike Lane" } ], "clip_score": 31.203125, "loss": 2.7266201972961426 }, { "id": "identity_321454", "image": "PMC-VQA/images/PMC8064425_FIG3_423587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow in this image show?\nChoices:\n(A) Parasagittal gyriform hyperintensity with bilateral distribution \n(B) Parasagittal gyriform hypointensity with unilateral distribution \n(C) Circumscribed hyperintensity in cerebellum \n(D) Normal brain structure." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.34375, "loss": 4.006871700286865 }, { "id": "identity_205385", "image": "TextVQA/images/72b438e4e1b8d1f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ovens are installed in this kitchen?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.8125, "loss": 4.272459983825684 }, { "id": "identity_256306", "image": "DocVQA/images/gkpn0081_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of all quality tests combined for the week ending 2/8/91?" }, { "from": "gpt", "value": "The answer is 600%" } ], "clip_score": 22.484375, "loss": 2.2480368614196777 }, { "id": "identity_236981", "image": "ScienceQA/images/12331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jenny's test shows that adding a specific nutrient helps the bacteria produce 20% more insulin, how would you design an experiment to determine the optimal concentration of this nutrient for maximum insulin production, and what would be the expected outcome if the optimal concentration is found?" }, { "from": "gpt", "value": "The answer is The optimal concentration of the nutrient for maximum insulin production" } ], "clip_score": 22.3125, "loss": 2.046231269836426 }, { "id": "identity_72974", "image": "TQA/images/question_images/ocean_waves_7120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the time it takes for a wave crest at point A to reach point B?" }, { "from": "gpt", "value": "The answer is Wave Period" } ], "clip_score": 35.9375, "loss": 2.058302402496338 }, { "id": "identity_27911", "image": "GeoQA+/images/1355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的直径,点C,D在⊙O上.若∠CAB=36°,则∠D的度数为()\nChoices:\n(A) 72°\n(B) 54°\n(C) 45°\n(D) 36°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 2.6138577461242676 }, { "id": "identity_3061", "image": "PMC-VQA/images/PMC6894034_f2_79200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the aneurysm located in the patient's body? \nChoices:\n(A) In the thoracic aorta \n(B) In the abdominal aorta \n(C) In the femoral artery \n(D) In the carotid artery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.171875, "loss": 4.318395614624023 }, { "id": "identity_38780", "image": "DocVQA/images/jfgf0077_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount of compensation listed for the City Activity Council?" }, { "from": "gpt", "value": "The answer is $5,000" } ], "clip_score": 23.296875, "loss": 1.6855064630508423 }, { "id": "identity_271366", "image": "UniGeo/images/1799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in circle O, if ∠ABC = 51°, then ∠AOC is equal to which of the following angles?\n(A) 51°\n(B) 80°\n(C) 90°\n(D) 102°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.09375, "loss": 2.9238202571868896 }, { "id": "identity_120693", "image": "A-OKVQA/images/384659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.421875, "loss": 2.3005151748657227 }, { "id": "identity_294223", "image": "IconQA/images/48312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 33.25, "loss": 3.0149104595184326 }, { "id": "identity_237433", "image": "GeoQA+/images/695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O上一点,弦AD平分∠BAC,交弦BC于点E,CD=4,DE=2,若⊙O的半径为5,则BC的长为多少?" }, { "from": "gpt", "value": "The answer is \\( 2(\\sqrt{2} + 1) \\)" } ], "clip_score": 31.3125, "loss": 2.409191370010376 }, { "id": "identity_63206", "image": "AI2D/images/1468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the outermost layer of the Earth in the image?" }, { "from": "gpt", "value": "The answer is Crust" } ], "clip_score": 26.578125, "loss": 2.8614399433135986 }, { "id": "identity_298138", "image": "TabMWP/images/4704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the information provided by Mrs. Finley regarding the number of rounds in the spelling bee in previous years, what was the rate of change between 2012 and 2013?" }, { "from": "gpt", "value": "The answer is -3" } ], "clip_score": 39.25, "loss": 3.369192361831665 }, { "id": "identity_298324", "image": "ChartQA/images/OECD_ICT_GOODS_EXPORTS_CAN_CZE_FRA_NLD_ZAF_000024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What country does the blue color line represent?" }, { "from": "gpt", "value": "The answer is Canada" } ], "clip_score": 17.234375, "loss": 2.9531989097595215 }, { "id": "identity_54050", "image": "FigureQA/images/34835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 29.875, "loss": 3.941912889480591 }, { "id": "identity_240631", "image": "UniGeo/images/3425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,DC是弦,若∠BDC=31°,则∠DAC的度数等于多少?" }, { "from": "gpt", "value": "The answer is 59°" } ], "clip_score": 30.296875, "loss": 2.059678316116333 }, { "id": "identity_54745", "image": "DocVQA/images/sqdl0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the amount listed under \"Addendum\" in the cost section?" }, { "from": "gpt", "value": "The answer is $2,871" } ], "clip_score": 22.828125, "loss": 2.705580949783325 }, { "id": "identity_301527", "image": "FigureQA/images/30469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the amount of Gold smaller than the amount of Steel Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.15625, "loss": 3.8740830421447754 }, { "id": "identity_49061", "image": "DocVQA/images/xjhl0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the chart number mentioned in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.484375, "loss": 2.5797338485717773 }, { "id": "identity_110258", "image": "TextVQA/images/dff055dff5d8b847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible vehicles in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.875, "loss": 3.0476651191711426 }, { "id": "identity_123313", "image": "A-OKVQA/images/208731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the towel on the sand closest to the bottom of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.2578125, "loss": 4.062364101409912 }, { "id": "identity_118246", "image": "VQA2.0/images/413115.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 13.9296875, "loss": 2.204852819442749 }, { "id": "identity_75356", "image": "AI2D/images/3047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many structures are labeled in the eukaryote cell as per the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 30.453125, "loss": 3.913806438446045 }, { "id": "identity_328215", "image": "TextVQA/images/0085245e85b9815d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do these white letters say?" }, { "from": "gpt", "value": "The answer is what a waste! " } ], "clip_score": 27.65625, "loss": 4.014028072357178 }, { "id": "identity_247689", "image": "IconQA/images/90242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mandy starts making eggs at the time shown on the clock. If it takes her 15 minutes to prepare the ingredients and 10 minutes to cook the eggs, what time will it be when she finishes cooking?" }, { "from": "gpt", "value": "The answer is 6:25 A.M" } ], "clip_score": 26.765625, "loss": 2.1143455505371094 }, { "id": "identity_152148", "image": "ScienceQA/images/17108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic of the rock in the image indicates it can float on water?" }, { "from": "gpt", "value": "The answer is High porosity with air pockets" } ], "clip_score": 30.3125, "loss": 2.9683430194854736 }, { "id": "identity_267581", "image": "TabMWP/images/6398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money Tanvi needs to purchase 5 topaz bracelets and 6 silver necklaces?" }, { "from": "gpt", "value": "The answer is 8,970" } ], "clip_score": 31.140625, "loss": 2.2714779376983643 }, { "id": "identity_61307", "image": "ChartQA/images/13512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Dem and Rep in 2006?" }, { "from": "gpt", "value": "The answer is 26%" } ], "clip_score": 19.546875, "loss": 2.1192429065704346 }, { "id": "identity_284490", "image": "PMC-VQA/images/PMC7669910_F3_299358.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging modality was utilized to generate the processed FLAIR images? Was it:\n(A) CT scan\n(B) PET scan\n(C) MRI scan\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.859375, "loss": 3.915663242340088 }, { "id": "identity_129246", "image": "A-OKVQA/images/99423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the dog's collar?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.953125, "loss": 3.840684175491333 }, { "id": "identity_46480", "image": "DVQA/images/bar_train_00161699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the 'half' category across all groups?" }, { "from": "gpt", "value": "The answer is 3.67" } ], "clip_score": 24.53125, "loss": 1.8238139152526855 }, { "id": "identity_72145", "image": "ScienceQA/images/1998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders Lake Michigan to the east?" }, { "from": "gpt", "value": "The answer is Michigan" } ], "clip_score": 23.171875, "loss": 3.133979320526123 }, { "id": "identity_220870", "image": "VQA2.0/images/139983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person wearing gloves or mittens?" }, { "from": "gpt", "value": "The answer is Gloves" } ], "clip_score": 17.71875, "loss": 2.705471992492676 }, { "id": "identity_119218", "image": "VizWiz/images/VizWiz_train_00005551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first letter of the word that is partially visible on the left side of the image?" }, { "from": "gpt", "value": "The answer is N" } ], "clip_score": 18.15625, "loss": 3.3670482635498047 }, { "id": "identity_49738", "image": "DocVQA/images/skfd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Donald R. Calvert, Ph.D" } ], "clip_score": 21.96875, "loss": 3.0643181800842285 }, { "id": "identity_74550", "image": "AI2D/images/3325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure in the image is responsible for producing ATP?" }, { "from": "gpt", "value": "The answer is Mitochondrion" } ], "clip_score": 19.90625, "loss": 1.3727176189422607 }, { "id": "identity_298824", "image": "GeoQA+/images/1000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, ABCD is a quadrilateral with AC and BD as diagonals. If BC = 3 and the height from BC is 2, what is the area of the shaded region?\nChoices:\n(A) 3\n(B) 6\n(C) 12\n(D) 24" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.453125, "loss": 2.5129294395446777 }, { "id": "identity_173176", "image": "IconQA/images/98187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of yellow circles in the image?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 34.9375, "loss": 2.095412254333496 }, { "id": "identity_134215", "image": "DocVQA/images/sljf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the email from PJRUPPEL addressed to besides PJDANIEL?" }, { "from": "gpt", "value": "The answer is RUPPEL, PATRICIA" } ], "clip_score": 25.921875, "loss": 1.9817111492156982 }, { "id": "identity_238958", "image": "Geometry3K/images/1485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the perimeter of the figure. Round to the nearest tenth if necessary" }, { "from": "gpt", "value": "The answer is 41.1 inches" } ], "clip_score": 31.078125, "loss": 2.247002601623535 }, { "id": "identity_299013", "image": "GeoQA+/images/3594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, it is known that points B, C, and D lie on circle A, with angle CBD being twice angle BDC. If angle BAC is 44°, what is the measure of angle CAD?\n(A) 68°\n(B) 88°\n(C) 90°\n(D) 112°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.3125, "loss": 2.317077398300171 }, { "id": "identity_7293", "image": "DocVQA/images/jzhd0227_28.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page no mentioned in this document?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.28125, "loss": 2.975726366043091 }, { "id": "identity_27240", "image": "MapQA/images/map_11254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which states have the highest value in the USA ?" }, { "from": "gpt", "value": "The answer is Rhode Island" } ], "clip_score": 33.4375, "loss": 2.3783700466156006 }, { "id": "identity_185685", "image": "A-OKVQA/images/386853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more phones on cardboard or directly on the ground?" }, { "from": "gpt", "value": "The answer is Cardboard" } ], "clip_score": 26.734375, "loss": 2.290602207183838 }, { "id": "identity_181182", "image": "TabMWP/images/19026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount Brody saved by not spending $10 on lunch each day?" }, { "from": "gpt", "value": "The answer is $15" } ], "clip_score": 35.84375, "loss": 2.451070547103882 }, { "id": "identity_139043", "image": "DVQA/images/bar_train_00194108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between 'thing' and 'dirt'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.09375, "loss": 2.604290723800659 }, { "id": "identity_187525", "image": "TextVQA/images/152b1e329b8b71d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number written on the side of the ship near the top?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 25.125, "loss": 2.628882884979248 }, { "id": "identity_320315", "image": "TQA/images/question_images/parts_fish_2917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many kinds of fins are depicted?\nChoices:\n(A) 5\n(B) 3\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.9375, "loss": 3.0894336700439453 }, { "id": "identity_148511", "image": "DocVQA/images/zhhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many reviews were received according to the records?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.421875, "loss": 4.594903945922852 }, { "id": "identity_77860", "image": "AI2D/images/4607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common name of the tree depicted in the image?" }, { "from": "gpt", "value": "The answer is White Ash" } ], "clip_score": 33.6875, "loss": 2.9595565795898438 }, { "id": "identity_21671", "image": "DocVQA/images/fmmp0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Quantity for Trade Date 04-19-77?" }, { "from": "gpt", "value": "The answer is 500" } ], "clip_score": 21.53125, "loss": 2.5407092571258545 }, { "id": "identity_180182", "image": "TabMWP/images/26274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long does the reptile show last?" }, { "from": "gpt", "value": "The answer is 2 hours 20 minutes" } ], "clip_score": 22.875, "loss": 1.8709213733673096 }, { "id": "identity_333217", "image": "DocVQA/images/zqdw0217_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the daily dose taken by all patients for a cycle of 28 days?" }, { "from": "gpt", "value": "The answer is 0.625 mg CE" } ], "clip_score": 21.34375, "loss": 2.5275256633758545 }, { "id": "identity_253965", "image": "PMC-VQA/images/PMC5471892_Fig4_179489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the histological features observed in the left humerus of the Maltese dog, and how do they compare to the histological features of the right tibia?" }, { "from": "gpt", "value": "The answer is Left humerus has denser bone matrix than right tibia" } ], "clip_score": 24.609375, "loss": 2.859914779663086 }, { "id": "identity_312764", "image": "UniGeo/images/2839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,线段AB是圆O的直径,弦CD垂直于AB,如果∠BOC=70°,那么∠BAD等于多少?\n选项:\n(A) 20°\n(B) 30°\n(C) 35°\n(D) 70°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.96875, "loss": 2.387362241744995 }, { "id": "identity_213984", "image": "PMC-VQA/images/PMC4016293_F2_287402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any text or labels besides the letter 'G'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.609375, "loss": 3.128525495529175 }, { "id": "identity_194077", "image": "TextVQA/images/78d8b7071cde5fbf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images contain the word \"SALES\" in English?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.71875, "loss": 2.5663368701934814 }, { "id": "identity_326764", "image": "Super-CLEVR/images/superCLEVR_new_000709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the matte thing that is the same color as the metal vehicle?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 24.234375, "loss": 4.101490020751953 }, { "id": "identity_10269", "image": "PMC-VQA/images/PMC4159571_Fig1_319149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 2 Question: In which column are the areas indicated? \nChoices:\n(A) Left panel \n(B) Right panel \n(C) Middle panel \n(D) Full panel " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.75, "loss": 4.179572105407715 }, { "id": "identity_275170", "image": "IconQA/images/35745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which activity did Lily do first on Tuesday evening, eating dinner or watching a movie, as shown on the clocks?\nChoices:\n(A) ate dinner\n(B) watched a movie" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.875, "loss": 3.5761783123016357 }, { "id": "identity_49880", "image": "DVQA/images/bar_train_00169484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference for the object \"ball\" between the categories \"artist\" and \"store\"?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.34375, "loss": 3.2082862854003906 }, { "id": "identity_46811", "image": "DVQA/images/bar_train_00026200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms are compared in this chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 35.03125, "loss": 2.3891141414642334 }, { "id": "identity_183884", "image": "TextVQA/images/d28f9e9f6ae4d3ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the headline topic of the top story in the newspaper?" }, { "from": "gpt", "value": "The answer is Public transport and cars in Singapore" } ], "clip_score": 27.453125, "loss": 3.752455472946167 }, { "id": "identity_320871", "image": "Geometry3K/images/1431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x. Assume that segments that appear to be tangent are tangent.\nOptions:\n(A) 10\n(B) 20\n(C) 45\n(D) 110" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.890625, "loss": 2.944408655166626 }, { "id": "identity_277980", "image": "GEOS/images/030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that in the diagram AC = DC = DB and the measure of angle ACD is 48 degrees, what is the measure of angle B? \n(A) 24 degrees\n(B) 33 degrees\n(C) 48 degrees\n(D) 66 degrees" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.1875, "loss": 2.367523431777954 }, { "id": "identity_311616", "image": "ScienceQA/images/1927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Fill in the blank.\nHydrogen sulfide is ().\nChoices:\n(A) a compound\n(B) an elementary substance" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.921875, "loss": 4.01993465423584 }, { "id": "identity_80656", "image": "Geometry3K/images/1521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of diameter DF is 14, find the circumference of circle A" }, { "from": "gpt", "value": "The answer is 44 (rounded to the nearest whole number)" } ], "clip_score": 31.890625, "loss": 1.430649995803833 }, { "id": "identity_203284", "image": "A-OKVQA/images/205392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the vehicles moving in the same direction as the bus?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 29.453125, "loss": 2.836183547973633 }, { "id": "identity_179423", "image": "TabMWP/images/15686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vegetable received the highest number of votes from upper elementary students?" }, { "from": "gpt", "value": "The answer is Turnips" } ], "clip_score": 40.8125, "loss": 2.5187790393829346 }, { "id": "identity_39268", "image": "FigureQA/images/16315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.21875, "loss": 2.416969060897827 }, { "id": "identity_280153", "image": "TabMWP/images/27537.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Dakota requires to purchase a designer ball gown and a designer watch?" }, { "from": "gpt", "value": "The answer is 18,207" } ], "clip_score": 33.46875, "loss": 2.555241584777832 }, { "id": "identity_57925", "image": "DocVQA/images/fskw0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the invoice?" }, { "from": "gpt", "value": "The answer is 10/5/82" } ], "clip_score": 23.828125, "loss": 2.17268967628479 }, { "id": "identity_140352", "image": "DVQA/images/bar_train_00174447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people preferred church over match?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.65625, "loss": 2.281780481338501 }, { "id": "identity_333765", "image": "PMC-VQA/images/PMC7592484_f2_284001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lung zones are affected in a patient with this CT pattern? \nChoices:\n(A) upper zones \n(B) middle zones \n(C) lower zones \n(D) all zones" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.9375, "loss": 4.0971269607543945 }, { "id": "identity_26343", "image": "CLEVR-Math/images/CLEVR_train_050477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 6 gray metallic cylinders. How many gray metallic cylinders are left?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 23.578125, "loss": 2.666835069656372 }, { "id": "identity_256189", "image": "PlotQA/images/21143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average debt service across all years shown in the graph?" }, { "from": "gpt", "value": "The answer is 8.56" } ], "clip_score": 29.0625, "loss": 2.171462059020996 }, { "id": "identity_295010", "image": "Geometry3K/images/1604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the degree measure of minor arc AC is x and major arc ADC is y, and angle ABC measures 70 degrees, what is the value of y?\nChoices:\n(A) 110\n(B) 180\n(C) 250\n(D) 270" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.890625, "loss": 2.4066319465637207 }, { "id": "identity_248041", "image": "TabMWP/images/20553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of fish in all tanks that have at least 50 fish but fewer than 90 fish?" }, { "from": "gpt", "value": "The answer is 734" } ], "clip_score": 29.140625, "loss": 2.3224778175354004 }, { "id": "identity_21378", "image": "VQA2.0/images/103352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it a copper spigot?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.71875, "loss": 3.944565534591675 }, { "id": "identity_287670", "image": "VQA2.0/images/385145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are the bicycles located?" }, { "from": "gpt", "value": "The answer is bike rack" } ], "clip_score": 29.25, "loss": 2.261942148208618 }, { "id": "identity_41539", "image": "FigureQA/images/59033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Medium Orchid section represent more than 25% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 38.3125, "loss": 2.931562900543213 }, { "id": "identity_100332", "image": "TabMWP/images/26576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the patients' wait times at the doctor's office?" }, { "from": "gpt", "value": "The answer is 43 minutes" } ], "clip_score": 35.3125, "loss": 3.053410768508911 }, { "id": "identity_130887", "image": "A-OKVQA/images/480717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 10.0625, "loss": 3.401374101638794 }, { "id": "identity_94365", "image": "IconQA/images/78960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the total value is represented by the first two bars?" }, { "from": "gpt", "value": "The answer is 1/5" } ], "clip_score": 19.578125, "loss": 1.601524829864502 }, { "id": "identity_147697", "image": "DocVQA/images/szcg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who should be contacted for more information regarding the program?" }, { "from": "gpt", "value": "The answer is Dr. Richard M. Krause and Mrs. Snyder - Ext. 467" } ], "clip_score": 23.828125, "loss": 3.0229580402374268 }, { "id": "identity_86711", "image": "UniGeo/images/1649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if AO = 6 and the perimeter of quadrilateral DEFG is 14, what is the length of segment BO?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.15625, "loss": 2.335033416748047 }, { "id": "identity_329169", "image": "VQA2.0/images/8025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are they riding?" }, { "from": "gpt", "value": "The answer is bikes" } ], "clip_score": 26.53125, "loss": 2.9029226303100586 }, { "id": "identity_28944", "image": "PMC-VQA/images/PMC3411310_Fig1_148091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was shown in the lymphadenopathy? \nChoices:\n(A) No calcification \n(B) Moderate calcification \n(C) Fine specks of calcification \n(D) Coarse calcification " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.328125, "loss": 3.8875229358673096 }, { "id": "identity_201727", "image": "VQA2.0/images/24368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing the street in front of the bus?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.765625, "loss": 3.06410813331604 }, { "id": "identity_258589", "image": "VQA2.0/images/119761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the company in the picture and what is the color of the car in the background?" }, { "from": "gpt", "value": "The answer is BROOK LEDGE, INC. and red" } ], "clip_score": 12.359375, "loss": 3.133479595184326 }, { "id": "identity_248196", "image": "A-OKVQA/images/109450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for three coffees and two pastries if the price of one coffee is $0.99 and each pastry costs $1.49?" }, { "from": "gpt", "value": "The answer is $5.95" } ], "clip_score": 13.46875, "loss": 1.5759230852127075 }, { "id": "identity_50745", "image": "DocVQA/images/thcn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage is recommended for the BMD corresponding to extra risk?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 25.765625, "loss": 1.5005501508712769 }, { "id": "identity_49196", "image": "DocVQA/images/nkgv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cost per site for \"Experience Nutrition\" materials?" }, { "from": "gpt", "value": "The answer is $1,800" } ], "clip_score": 21.0, "loss": 1.838964819908142 }, { "id": "identity_7826", "image": "ScienceQA/images/20981.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Brennan and Ed trade to each get what they want?\nChoices:\n(A) Brennan can trade his tomatoes for Ed's broccoli.\n(B) Brennan can trade his tomatoes for Ed's carrots.\n(C) Ed can trade his broccoli for Brennan's oranges.\n(D) Ed can trade his almonds for Brennan's tomatoes." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.859375, "loss": 3.1620047092437744 }, { "id": "identity_247340", "image": "ChartQA/images/5976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined total number of Iraqi and Afghan citizens resettled in the U.S. under special immigrant visa programs for the top three states?" }, { "from": "gpt", "value": "The answer is 35,263" } ], "clip_score": 31.0, "loss": 2.2672784328460693 }, { "id": "identity_28162", "image": "GeoQA+/images/1875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,CD是Rt△ABC斜边AB上的高,AC=8,BC=6,则\\cos∠BCD的值是()\nChoices:\n(A) \\frac{3}{5}\n(B) \\frac{3}{4}\n(C) \\frac{4}{3}\n(D) \\frac{4}{5}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.21875, "loss": 2.25213623046875 }, { "id": "identity_161776", "image": "Geometry3K/images/509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle JKL?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 37.90625, "loss": 1.7740321159362793 }, { "id": "identity_128418", "image": "PMC-VQA/images/PMC6922538_F2_87534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the lesion in relation to the organ?" }, { "from": "gpt", "value": "The answer is One-fourth to one-third of the organ's width" } ], "clip_score": 22.90625, "loss": 2.2891628742218018 }, { "id": "identity_169418", "image": "IconQA/images/94422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hearts in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.65625, "loss": 2.4678573608398438 }, { "id": "identity_176943", "image": "TabMWP/images/29442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each shopping bag costs $20, how much would 6 shopping bags cost?" }, { "from": "gpt", "value": "The answer is $120" } ], "clip_score": 29.296875, "loss": 1.2162106037139893 }, { "id": "identity_76692", "image": "AI2D/images/2002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of hawks decreased, which organism would likely increase in population?" }, { "from": "gpt", "value": "The answer is Mice" } ], "clip_score": 20.15625, "loss": 2.767354965209961 }, { "id": "identity_191945", "image": "PMC-VQA/images/PMC4513678_Fig3_409055.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What medical imaging technique is depicted in this image?" }, { "from": "gpt", "value": "The answer is Angiogram" } ], "clip_score": 30.421875, "loss": 2.8314788341522217 }, { "id": "identity_45063", "image": "MapQA/images/map_7834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the map include data for Alaska and Hawaii?" }, { "from": "gpt", "value": "The answer is Yes, they are included as insets" } ], "clip_score": 25.59375, "loss": 3.421525239944458 }, { "id": "identity_227128", "image": "PlotQA/images/2802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of male and female labour force over the years, and how does the difference between these averages compare?" }, { "from": "gpt", "value": "The answer is Average male: 60%, Average female: 50%, Difference: 10%" } ], "clip_score": 24.84375, "loss": 1.6283880472183228 }, { "id": "identity_9289", "image": "PMC-VQA/images/PMC6375059_Fig16_435904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ is affected by the mass extension? \nChoices:\n(A) Right Kidney \n(B) Left kidney \n(C) Liver \n(D) Spleen " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.265416145324707 }, { "id": "identity_15802", "image": "AI2D/images/969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the colour of flower A?\nChoices:\n(A) red\n(B) pink\n(C) yellow\n(D) blue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.770744323730469 }, { "id": "identity_71983", "image": "TQA/images/question_images/hydrocarbons_9119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is present between the carbon atoms in the saturated hydrocarbon?" }, { "from": "gpt", "value": "The answer is Single bonds" } ], "clip_score": 35.1875, "loss": 2.525707483291626 }, { "id": "identity_5652", "image": "VQA-RAD/images/synpic60543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was probably found incidentally on this image?" }, { "from": "gpt", "value": "The answer is cavum vergae" } ], "clip_score": 28.53125, "loss": 2.9641380310058594 }, { "id": "identity_334847", "image": "ScienceQA/images/10592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Kansas City\n(B) Bismarck\n(C) Jefferson City\n(D) Billings" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.609375, "loss": 4.123322010040283 }, { "id": "identity_36147", "image": "VQA2.0/images/355481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are there?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 23.796875, "loss": 2.222078561782837 }, { "id": "identity_334338", "image": "TabMWP/images/23982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals consumed more than 4 bananas last week according to the chef's records?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 36.125, "loss": 2.25909161567688 }, { "id": "identity_158442", "image": "ScienceQA/images/6555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scientific classification order of this bird?" }, { "from": "gpt", "value": "The answer is Ciconiiformes" } ], "clip_score": 27.6875, "loss": 1.6414134502410889 }, { "id": "identity_131070", "image": "Super-CLEVR/images/superCLEVR_new_001452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have two wheels?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.1875, "loss": 2.459787130355835 }, { "id": "identity_225229", "image": "GeoQA+/images/2475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,PA、PB切⊙O于点A、B,直线FG切⊙O于点E,交PA于F,交PB于点G,若PA=8cm,且FG=4cm,则△PFG的面积是多少?" }, { "from": "gpt", "value": "The answer is 8√2 cm²" } ], "clip_score": 26.765625, "loss": 1.7476139068603516 }, { "id": "identity_88184", "image": "GeoQA+/images/5511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,∠C=35°,则∠ACB的度数为()" }, { "from": "gpt", "value": "The answer is 55°" } ], "clip_score": 33.25, "loss": 1.9509356021881104 }, { "id": "identity_254601", "image": "DocVQA/images/qghv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long is the entire session from the start of the Product Approval Process to the end of the Adjournment?" }, { "from": "gpt", "value": "The answer is 3 hours" } ], "clip_score": 21.234375, "loss": 2.104698896408081 }, { "id": "identity_308553", "image": "PMC-VQA/images/PMC7968257_Fig3_393535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging sequence was used to capture this? \nChoices:\n(A) T1 sequence \n(B) T2 sequence \n(C) DWI sequence \n(D) FLAIR sequence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.53125, "loss": 3.850816249847412 }, { "id": "identity_244621", "image": "TabMWP/images/12259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the number of boxes of cookies sold per day from Tuesday to Saturday?" }, { "from": "gpt", "value": "The answer is 1.75 boxes per day" } ], "clip_score": 35.0625, "loss": 1.6286062002182007 }, { "id": "identity_202367", "image": "A-OKVQA/images/513670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the kite closest to the ground?" }, { "from": "gpt", "value": "The answer is Red and blue" } ], "clip_score": 24.015625, "loss": 3.5615286827087402 }, { "id": "identity_77349", "image": "AI2D/images/1994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organisms in the soil food web are directly dependent on plants for their nutrition?" }, { "from": "gpt", "value": "The answer is Nematodes (Root-feeders), Fungi (Mycorrhizal fungi and Saprophytic fungi), and Bacteria" } ], "clip_score": 32.0, "loss": 1.514045000076294 }, { "id": "identity_28795", "image": "A-OKVQA/images/54335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be seen by looking through the clock?\nChoices:\n(A) boats\n(B) field\n(C) city\n(D) ocean" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.234375, "loss": 4.258110046386719 }, { "id": "identity_231117", "image": "UniGeo/images/1025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若AB是⊙O的直径,CD是⊙O的弦,∠ABD=58°,求∠CAD的度数。" }, { "from": "gpt", "value": "The answer is 26°" } ], "clip_score": 31.0625, "loss": 2.4802117347717285 }, { "id": "identity_74256", "image": "TQA/images/abc_question_images/parts_skeleton_17270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the skeleton is labeled with 'Y'?" }, { "from": "gpt", "value": "The answer is Coccyx" } ], "clip_score": 31.234375, "loss": 2.4212801456451416 }, { "id": "identity_230026", "image": "VQA-AS/images/4573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are on the mantle, and what is the total number of animals (including toys) in the room?" }, { "from": "gpt", "value": "The answer is Two plants, five animals/toys" } ], "clip_score": 24.625, "loss": 3.3063786029815674 }, { "id": "identity_63533", "image": "TQA/images/question_images/cycle_rock_6741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the process that turns igneous rock into metamorphic rock?" }, { "from": "gpt", "value": "The answer is Heat and pressure" } ], "clip_score": 34.5, "loss": 2.0087270736694336 }, { "id": "identity_68680", "image": "AI2D/images/1784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are present in the Antarctic food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.59375, "loss": 4.064418792724609 }, { "id": "identity_277568", "image": "DocVQA/images/gxwg0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what timeframe did Roger Smith (M.D.) serve as a Medical Registrar at U.C.H. under Dr. Stokes and Dr. Prankerd?" }, { "from": "gpt", "value": "The answer is 1959-1961" } ], "clip_score": 26.453125, "loss": 1.4004275798797607 }, { "id": "identity_51296", "image": "PlotQA/images/126936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of trademark applications made by Lao PDR in the given years?" }, { "from": "gpt", "value": "The answer is an integer" } ], "clip_score": 22.515625, "loss": 4.048461437225342 }, { "id": "identity_45744", "image": "FigureQA/images/69019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the Orange line at the 50 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is 58" } ], "clip_score": 25.21875, "loss": 2.9154415130615234 }, { "id": "identity_53029", "image": "FigureQA/images/97246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Lime Green consistently above 80 on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.828125, "loss": 3.19539213180542 }, { "id": "identity_239591", "image": "Super-CLEVR/images/superCLEVR_new_000408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relative size of the metal object behind the motorbike that is on the left side of the big object, which is on the right side of the small red rubber thing, compared to the yellow bus?" }, { "from": "gpt", "value": "The answer is Smaller than the yellow bus" } ], "clip_score": 29.140625, "loss": 2.3416666984558105 }, { "id": "identity_325830", "image": "VQA2.0/images/148019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is it located?" }, { "from": "gpt", "value": "The answer is tree" } ], "clip_score": 19.90625, "loss": 3.9707589149475098 }, { "id": "identity_140134", "image": "FigureQA/images/78097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Dark Periwinkle?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.03125, "loss": 3.067016363143921 }, { "id": "identity_29243", "image": "TabMWP/images/9229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Ruth buys 5 kilograms of green Anjou pears, 5 kilograms of Concorde pears, and 2 kilograms of Bartlett pears. What is the total cost?" }, { "from": "gpt", "value": "The answer is 11.54" } ], "clip_score": 31.890625, "loss": 2.405207872390747 }, { "id": "identity_218157", "image": "A-OKVQA/images/162717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanterns are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.375, "loss": 2.3553731441497803 }, { "id": "identity_5266", "image": "PMC-VQA/images/PMC5989222_f0001_324804.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging test is recommended to detect metastases from large-cell neuroendocrine carcinoma of the cervix in the chest area? \nChoices:\n(A) CT scan without contrast \n(B) MRI of the chest with gadolinium \n(C) PET scan \n(D) Chest X-ray " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.5, "loss": 4.315730094909668 }, { "id": "identity_218143", "image": "VQA2.0/images/448274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What religious symbol is displayed above the trophy case?" }, { "from": "gpt", "value": "The answer is A cross" } ], "clip_score": 20.453125, "loss": 3.359337091445923 }, { "id": "identity_195135", "image": "A-OKVQA/images/326210.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the water bottle on the desk?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.75, "loss": 3.182682752609253 }, { "id": "identity_174738", "image": "IconQA/images/27631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What transformation has been applied to the letter 'P' to get the letter 'd'?" }, { "from": "gpt", "value": "The answer is Rotation and Reflection" } ], "clip_score": 25.59375, "loss": 2.7941887378692627 }, { "id": "identity_144767", "image": "DocVQA/images/gjdw0228_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What department is the sender affiliated with?" }, { "from": "gpt", "value": "The answer is Dept. of Biochemistry" } ], "clip_score": 20.90625, "loss": 2.8299736976623535 }, { "id": "identity_234812", "image": "AI2D/images/4368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the nut labeled E and describe its position relative to the other nuts in the diagram" }, { "from": "gpt", "value": "The answer is Hazelnut is in the top row, third from the left" } ], "clip_score": 31.78125, "loss": 2.353029251098633 }, { "id": "identity_218082", "image": "PMC-VQA/images/PMC7693669_children-07-00199-f002_306734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of Doppler echocardiography is depicted in the image?" }, { "from": "gpt", "value": "The answer is Color flow Doppler echocardiography" } ], "clip_score": 31.484375, "loss": 1.8482476472854614 }, { "id": "identity_198880", "image": "PMC-VQA/images/PMC5019880_fig1_72290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of midline shift?" }, { "from": "gpt", "value": "The answer is No midline shift" } ], "clip_score": 23.375, "loss": 3.1419811248779297 }, { "id": "identity_8226", "image": "A-OKVQA/images/207486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the animal currently sitting?\nChoices:\n(A) street\n(B) park\n(C) boardwalk\n(D) beach" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.421875, "loss": 4.42257022857666 }, { "id": "identity_109112", "image": "VizWiz/images/VizWiz_train_00014820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the lens on the object?" }, { "from": "gpt", "value": "The answer is Round" } ], "clip_score": 27.09375, "loss": 3.883281707763672 }, { "id": "identity_85724", "image": "GeoQA+/images/3009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the circle O with diameter AB perpendicular to chord CD at E, and angle BOC measuring 48 degrees, what is the measure of angle CED?" }, { "from": "gpt", "value": "The answer is 42 degrees" } ], "clip_score": 37.78125, "loss": 1.8936482667922974 }, { "id": "identity_90823", "image": "GeoQA+/images/4641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABD?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 31.796875, "loss": 1.7949624061584473 }, { "id": "identity_93179", "image": "CLEVR-Math/images/CLEVR_train_059213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more spheres or cubes in the image?" }, { "from": "gpt", "value": "The answer is Cubes" } ], "clip_score": 28.71875, "loss": 2.542621374130249 }, { "id": "identity_326026", "image": "TextVQA/images/0d401ff381cbf336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which companies are being advertised here?" }, { "from": "gpt", "value": "The answer is sanyo samsung" } ], "clip_score": 31.46875, "loss": 2.186572551727295 }, { "id": "identity_333183", "image": "MapQA/images/map_6989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the lowest value in the USA?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.015625, "loss": 2.5117688179016113 }, { "id": "identity_182141", "image": "A-OKVQA/images/365618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the vehicle in the image moving or parked?" }, { "from": "gpt", "value": "The answer is Moving" } ], "clip_score": 22.265625, "loss": 2.6242682933807373 }, { "id": "identity_14873", "image": "IconQA/images/104582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long." }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 34.5625, "loss": 2.3365142345428467 }, { "id": "identity_47592", "image": "ChartQA/images/40350636009248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the DALYs rate for Sierra Leone?" }, { "from": "gpt", "value": "The answer is 1,661" } ], "clip_score": 17.71875, "loss": 2.3183224201202393 }, { "id": "identity_69531", "image": "ScienceQA/images/16331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean is to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is Pacific Ocean" } ], "clip_score": 25.671875, "loss": 2.7897298336029053 }, { "id": "identity_307560", "image": "PMC-VQA/images/PMC7000761_Fig1_111094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this guiding curve used for? \nChoices:\n(A) To resample images \n(B) To visualize the brain \n(C) To straighten the spinal cord \n(D) To cover the lumbar spinal cord" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5, "loss": 4.4052205085754395 }, { "id": "identity_96495", "image": "TabMWP/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hedgehogs in 2 cages and 3 cages combined?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 33.875, "loss": 1.9728853702545166 }, { "id": "identity_75820", "image": "ScienceQA/images/246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of forest ecosystem is depicted in the image?" }, { "from": "gpt", "value": "The answer is Deciduous forest" } ], "clip_score": 25.984375, "loss": 2.8122315406799316 }, { "id": "identity_329396", "image": "DocVQA/images/jxpj0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was Diet Coke introduced in the United States?" }, { "from": "gpt", "value": "The answer is 1982" } ], "clip_score": 19.78125, "loss": 1.178652048110962 }, { "id": "identity_153089", "image": "ScienceQA/images/9585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 25.171875, "loss": 2.7560577392578125 }, { "id": "identity_135028", "image": "DVQA/images/bar_train_00086184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of 'essay' and 'attack' for 'tail'?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 22.59375, "loss": 2.5097122192382812 }, { "id": "identity_319036", "image": "A-OKVQA/images/161738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city are the signs showing love for?\nChoices:\n(A) manchester\n(B) moscow\n(C) milan\n(D) manila" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.75, "loss": 3.9517836570739746 }, { "id": "identity_231770", "image": "IconQA/images/22976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 32.4375, "loss": 4.461332321166992 }, { "id": "identity_286111", "image": "PMC-VQA/images/PMC2828401_F4_57868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific feature is being examined in the CCLT rabbit model of OA in the radiographic atlas? Is it:\n(A) Medial meniscus tears,\n(B) Lateral femoral condyles osteophytes,\n(C) Patellar dislocation, or\n(D) Medial femoral condyles osteophytes?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.0, "loss": 4.392931938171387 }, { "id": "identity_9205", "image": "GeoQA+/images/2371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,ABCD为⊙O的内接四边形,E为AB上一点,∠CBE=50°,则∠AOC=()\nChoices:\n(A) 25°\n(B) 50°\n(C) 100°\n(D) 130°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.359375, "loss": 2.8398444652557373 }, { "id": "identity_138863", "image": "DocVQA/images/jfhb0018_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total travel time from Winston-Salem to Teterboro including layover in D.C.?" }, { "from": "gpt", "value": "The answer is 2 hours 3 minutes" } ], "clip_score": 22.296875, "loss": 2.3061423301696777 }, { "id": "identity_153541", "image": "ScienceQA/images/10730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 26.1875, "loss": 2.81215763092041 }, { "id": "identity_90398", "image": "Geometry3K/images/1447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle KMN" }, { "from": "gpt", "value": "The answer is 140 degrees" } ], "clip_score": 31.84375, "loss": 2.0244364738464355 }, { "id": "identity_205945", "image": "VQA2.0/images/265173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street signs are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.953125, "loss": 4.51474666595459 }, { "id": "identity_200918", "image": "A-OKVQA/images/213506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of child safety equipment is attached to the nearest bicycle?" }, { "from": "gpt", "value": "The answer is Child bike seat" } ], "clip_score": 24.53125, "loss": 2.605807304382324 }, { "id": "identity_20377", "image": "ScienceQA/images/15914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes the Scarborough Marsh ecosystem?\nChoices:\n(A) It has land that is covered with water during most of the year. It also has soil that is poor in nutrients.\n(B) It has soil that is rich in nutrients. It also has other water ecosystems nearby." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.8125, "loss": 4.564202785491943 }, { "id": "identity_224111", "image": "TextVQA/images/2110eb6f3e41b08a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the country of the orchestra and list the three composers mentioned on the album cover" }, { "from": "gpt", "value": "The answer is Argentina, Piazzolla, Bragato, Arizaga" } ], "clip_score": 27.96875, "loss": 1.534597396850586 }, { "id": "identity_59043", "image": "MapQA/images/map_9890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have the highest range of farmers, 63-66?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.78125, "loss": 2.406548023223877 }, { "id": "identity_235311", "image": "FigureQA/images/53769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the Pale Green and Royal Blue sections?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 35.03125, "loss": 1.832237958908081 }, { "id": "identity_162220", "image": "UniGeo/images/3306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle B is 65 degrees, what is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 25 degrees" } ], "clip_score": 35.84375, "loss": 2.1103758811950684 }, { "id": "identity_135062", "image": "DVQA/images/bar_train_00031006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value for the 'hay' group across all categories?" }, { "from": "gpt", "value": "The answer is 66.67" } ], "clip_score": 23.453125, "loss": 1.7066484689712524 }, { "id": "identity_14561", "image": "MapQA/images/map_502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Maryland ?" }, { "from": "gpt", "value": "The answer is 638-723" } ], "clip_score": 23.296875, "loss": 2.4409499168395996 }, { "id": "identity_186260", "image": "VQA2.0/images/327324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface are the vase and flowers placed on?" }, { "from": "gpt", "value": "The answer is A countertop or table" } ], "clip_score": 27.9375, "loss": 3.4026124477386475 }, { "id": "identity_297174", "image": "A-OKVQA/images/397186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which two individuals are being evaluated in the judgment? \nChoices:\n(A) dog dog\n(B) woman\n(C) man man\n(D) dog man" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.53125, "loss": 3.9491984844207764 }, { "id": "identity_78037", "image": "AI2D/images/3254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Nucleolus\"?" }, { "from": "gpt", "value": "The answer is Produces and assembles ribosomes" } ], "clip_score": 26.484375, "loss": 2.281261682510376 }, { "id": "identity_53441", "image": "ChartQA/images/two_col_20554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Totally disagree 7%" } ], "clip_score": 33.78125, "loss": 2.084275960922241 }, { "id": "identity_165367", "image": "Geometry3K/images/650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x if the sum of the measures of the angles in the circle is 360°?" }, { "from": "gpt", "value": "The answer is 10.6" } ], "clip_score": 31.90625, "loss": 2.5619444847106934 }, { "id": "identity_78521", "image": "ScienceQA/images/15362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the food web is a primary consumer?" }, { "from": "gpt", "value": "The answer is Brown lemming, barren-ground caribou, earthworm" } ], "clip_score": 30.96875, "loss": 2.237574815750122 }, { "id": "identity_168926", "image": "TabMWP/images/4932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in Duncan's available funds from the end of June to the end of July?" }, { "from": "gpt", "value": "The answer is -$3.30" } ], "clip_score": 26.53125, "loss": 2.2229838371276855 }, { "id": "identity_289846", "image": "A-OKVQA/images/20251.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options can be utilized for transporting multiple luggage bags? \n(A) servants\n(B) people\n(C) carousel\n(D) luggage cart" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.34375, "loss": 3.9173998832702637 }, { "id": "identity_301662", "image": "DocVQA/images/kzhm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this event?" }, { "from": "gpt", "value": "The answer is SATURDAY, OCTOBER 18, 1975" } ], "clip_score": 17.875, "loss": 1.3734526634216309 }, { "id": "identity_57283", "image": "DocVQA/images/ymbv0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the reception?" }, { "from": "gpt", "value": "The answer is September 5" } ], "clip_score": 18.1875, "loss": 3.0082130432128906 }, { "id": "identity_327105", "image": "DocVQA/images/hffm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much for Dinner - Daniel F. O'Keefe?" }, { "from": "gpt", "value": "The answer is 14.90" } ], "clip_score": 23.9375, "loss": 1.9276304244995117 }, { "id": "identity_135187", "image": "DVQA/images/bar_train_00054876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sum of 'matter' values for 'novel' and 'sort' greater than the 'line' value for 'harbor'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.953125, "loss": 3.0545780658721924 }, { "id": "identity_158592", "image": "TQA/images/question_images/erosion_6845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two types of beach systems depicted in the image?" }, { "from": "gpt", "value": "The answer is Active Beach System and Storm Beach" } ], "clip_score": 32.59375, "loss": 2.5570132732391357 }, { "id": "identity_75899", "image": "AI2D/images/2107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for the Arctic Hare in this food web?" }, { "from": "gpt", "value": "The answer is Grass" } ], "clip_score": 34.8125, "loss": 3.2019126415252686 }, { "id": "identity_53300", "image": "FigureQA/images/56320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Turquoise sector represent less than 30% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.96875, "loss": 3.180006980895996 }, { "id": "identity_248181", "image": "Super-CLEVR/images/superCLEVR_new_000346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are either shiny objects in front of the tiny matte double bus or gray shiny tandem bikes, and how many of these objects are also planes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 32.15625, "loss": 3.0401175022125244 }, { "id": "identity_131423", "image": "A-OKVQA/images/459997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus shown in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 20.59375, "loss": 3.311192274093628 }, { "id": "identity_104380", "image": "VQA2.0/images/64392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many potted plants are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.53125, "loss": 4.4163665771484375 }, { "id": "identity_90327", "image": "UniGeo/images/400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠AOB and ∠COD are supplementary and AB = 2, CD = 4, what is the length of the arc AC?" }, { "from": "gpt", "value": "The answer is The length of arc AC is the sum of the lengths of arcs AB and BC" } ], "clip_score": 32.15625, "loss": 1.6778044700622559 }, { "id": "identity_265242", "image": "PMC-VQA/images/PMC4698516_F1_458379.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the structure labeled 'a' in image C. Is it the tip of the leaf, submarginal scissor, base of the leaf, or marginal scissor?\nChoices:\n(A) Tip of the leaf \n(B) Submarginal scissor \n(C) Base of the leaf \n(D) Marginal scissor." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.765625, "loss": 3.938498020172119 }, { "id": "identity_71833", "image": "AI2D/images/782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many steps are shown in the nitrogen cycle within this diagram?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 18.671875, "loss": 4.137836456298828 }, { "id": "identity_188632", "image": "VQA2.0/images/578805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plates are visible on the table?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.03125, "loss": 2.2521274089813232 }, { "id": "identity_124306", "image": "PMC-VQA/images/PMC3474213_fig4_160836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level of this histological image?" }, { "from": "gpt", "value": "The answer is The magnification level is not specified" } ], "clip_score": 32.6875, "loss": 3.0173680782318115 }, { "id": "identity_143039", "image": "FigureQA/images/49672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the Deep Sky Blue bar?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 26.671875, "loss": 2.426097869873047 }, { "id": "identity_300436", "image": "DVQA/images/bar_train_00182755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of people favor the object work in this category?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 29.015625, "loss": 2.5365374088287354 }, { "id": "identity_88246", "image": "GeoQA+/images/1925.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠C in triangle ABC is 40°, what is the measure of ∠CAB in the circle O?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 39.03125, "loss": 1.6810976266860962 }, { "id": "identity_206469", "image": "A-OKVQA/images/91994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the person?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.546875, "loss": 3.502519130706787 }, { "id": "identity_32184", "image": "VQA2.0/images/11682.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of animal is shown?" }, { "from": "gpt", "value": "The answer is giraffe" } ], "clip_score": 30.328125, "loss": 2.512232542037964 }, { "id": "identity_233074", "image": "VQA2.0/images/233637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles (including bicycles, cars, and horse-drawn carriages) in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.671875, "loss": 2.236335515975952 }, { "id": "identity_113165", "image": "TextVQA/images/c1b24f8d6c47efed.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of drink is advertised at the price of 3 Euros on the menu board?" }, { "from": "gpt", "value": "The answer is Glühwein" } ], "clip_score": 24.28125, "loss": 2.8625552654266357 }, { "id": "identity_177969", "image": "IconQA/images/96752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you were to outline the entire image with paper clips, how many paper clips would you need?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.375, "loss": 1.9306379556655884 }, { "id": "identity_113509", "image": "A-OKVQA/images/533553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is the building in the background?" }, { "from": "gpt", "value": "The answer is Stilt house" } ], "clip_score": 22.40625, "loss": 3.6742613315582275 }, { "id": "identity_200417", "image": "VizWiz/images/VizWiz_train_00011096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the surface the scissors are resting on?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 33.125, "loss": 2.5980148315429688 }, { "id": "identity_209118", "image": "PMC-VQA/images/PMC6334890_FIG1_423706.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is suggested by the fusion of colors in the images?" }, { "from": "gpt", "value": "The answer is Positron Emission Tomography (PET) combined with CT or MRI" } ], "clip_score": 31.875, "loss": 1.6635977029800415 }, { "id": "identity_35115", "image": "Super-CLEVR/images/superCLEVR_new_000423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the object that is the same color as the small rubber biplane?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 22.71875, "loss": 4.064677715301514 }, { "id": "identity_231653", "image": "Geometry3K/images/624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that \\( m \\angle 1 = 58^\\circ \\), \\( m \\angle 2 = 47^\\circ \\), and \\( m \\angle 3 = 26^\\circ \\), find the measure of \\( \\angle 9 \\)" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 26.5, "loss": 2.4832653999328613 }, { "id": "identity_280856", "image": "TextVQA/images/4353e86154a1ad14.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the jersey number of the man in front?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 16.484375, "loss": 1.8981424570083618 }, { "id": "identity_136005", "image": "DVQA/images/bar_train_00041253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the legend of the graph?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.953125, "loss": 2.7590789794921875 }, { "id": "identity_93588", "image": "IconQA/images/33465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many inches does the twig extend beyond the 3 inch mark on the ruler?" }, { "from": "gpt", "value": "The answer is 1.5 inches" } ], "clip_score": 38.46875, "loss": 1.545474886894226 }, { "id": "identity_208893", "image": "PMC-VQA/images/PMC2981854_F0002_78516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any vascular structures in the given CT image?" }, { "from": "gpt", "value": "The answer is Aorta and inferior vena cava visible" } ], "clip_score": 29.890625, "loss": 2.611751079559326 }, { "id": "identity_104220", "image": "A-OKVQA/images/2525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of one cupcake?" }, { "from": "gpt", "value": "The answer is £1.00" } ], "clip_score": 27.796875, "loss": 1.6184613704681396 }, { "id": "identity_9401", "image": "ScienceQA/images/17953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Oklahoma?\nChoices:\n(A) Oklahoma City\n(B) Tallahassee\n(C) Jefferson City\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.546875, "loss": 4.239870071411133 }, { "id": "identity_42982", "image": "FigureQA/images/88762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the Olive line at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 19.46875, "loss": 2.515347719192505 }, { "id": "identity_167327", "image": "TabMWP/images/5541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of tractors owned by the farmers?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.6875, "loss": 2.2866451740264893 }, { "id": "identity_196472", "image": "TextVQA/images/6e2de61ddf6937a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the author of the book shown in the image?" }, { "from": "gpt", "value": "The answer is Ludmila Romportlová" } ], "clip_score": 34.28125, "loss": 2.606637954711914 }, { "id": "identity_134638", "image": "PlotQA/images/136481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of ores and metals exported to Latin America (developing only) in 1964?" }, { "from": "gpt", "value": "The answer is 15%" } ], "clip_score": 26.859375, "loss": 1.9559224843978882 }, { "id": "identity_49396", "image": "DVQA/images/bar_train_00077850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm church for the dataset evil?" }, { "from": "gpt", "value": "The answer is Approximately 2.2" } ], "clip_score": 32.75, "loss": 2.4847116470336914 }, { "id": "identity_241504", "image": "FigureQA/images/23321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Orange Red and Light Seafoam?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 33.65625, "loss": 1.7985384464263916 }, { "id": "identity_274296", "image": "ChartQA/images/11755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point does the percentage of people who favor democracy reach its highest level?" }, { "from": "gpt", "value": "The answer is 2012" } ], "clip_score": 24.921875, "loss": 1.9191722869873047 }, { "id": "identity_285329", "image": "UniGeo/images/864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is a chord of circle O, OD is perpendicular to AB with foot C, intersecting circle O at point D, and point E lies on the major arc AB. If ∠AOD = 52°, what is the degree measure of ∠DEB?\nChoices:\n(A) 52°\n(B) 40°\n(C) 26°\n(D) 45°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.1875, "loss": 2.4167113304138184 }, { "id": "identity_336661", "image": "DocVQA/images/yjpg0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this panel?" }, { "from": "gpt", "value": "The answer is Liaison Advisory Panel" } ], "clip_score": 19.46875, "loss": 2.984794855117798 }, { "id": "identity_238814", "image": "TextVQA/images/42a0935bbc9da622.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the film clinic associated with this movie?" }, { "from": "gpt", "value": "The answer is Film Clinic" } ], "clip_score": 10.3203125, "loss": 1.8465282917022705 }, { "id": "identity_110665", "image": "Super-CLEVR/images/superCLEVR_new_001159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the yellow object?" }, { "from": "gpt", "value": "The answer is Dirtbike" } ], "clip_score": 26.484375, "loss": 3.9276411533355713 }, { "id": "identity_285999", "image": "GeoQA+/images/3216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, if circle O is tangent to two sides AE and CD of regular pentagon ABCDE at points A and C, what is the degree measure of angle AOC?\nChoices:\n(A) 144°\n(B) 130°\n(C) 129°\n(D) 108°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.09375, "loss": 2.1744351387023926 }, { "id": "identity_51451", "image": "FigureQA/images/81829.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values on the x-axis for Dim Gray?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 22.078125, "loss": 2.7169811725616455 }, { "id": "identity_213949", "image": "A-OKVQA/images/321185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians visible in the rearview mirror reflection?" }, { "from": "gpt", "value": "The answer is No pedestrians visible" } ], "clip_score": 22.859375, "loss": 3.202183485031128 }, { "id": "identity_8010", "image": "DVQA/images/bar_train_00034003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of accuracies of the algorithm number for all the datasets?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 33.625, "loss": 2.306387424468994 }, { "id": "identity_178461", "image": "CLEVR-Math/images/CLEVR_train_027856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.859375, "loss": 2.382918119430542 }, { "id": "identity_181437", "image": "CLEVR-Math/images/CLEVR_train_021495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the small grey cylinder?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.0, "loss": 2.550607919692993 }, { "id": "identity_111988", "image": "TextVQA/images/7b6df76509a1983b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors of insoles are displayed on the wall?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.109375, "loss": 2.1344244480133057 }, { "id": "identity_76740", "image": "TQA/images/question_images/human_system_nervous_6172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What connects the two hemispheres of the cerebrum?" }, { "from": "gpt", "value": "The answer is Corpus callosum" } ], "clip_score": 31.421875, "loss": 1.7730823755264282 }, { "id": "identity_45058", "image": "DocVQA/images/lfgn0226_46.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number of the document?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 17.71875, "loss": 2.2646408081054688 }, { "id": "identity_187050", "image": "PMC-VQA/images/PMC4816297_pone.0152505.g001_17082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a medical device or foreign object in the image?" }, { "from": "gpt", "value": "The answer is No medical device or foreign object visible" } ], "clip_score": 20.09375, "loss": 2.955719232559204 }, { "id": "identity_219608", "image": "VQA2.0/images/231320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the clock faces?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 26.40625, "loss": 3.8086979389190674 }, { "id": "identity_35095", "image": "VQA-AS/images/8540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Should the children be wearing long sleeves on a sunny day?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.875, "loss": 3.254676103591919 }, { "id": "identity_99942", "image": "IconQA/images/81054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this 3D shape?" }, { "from": "gpt", "value": "The answer is Cube" } ], "clip_score": 34.40625, "loss": 2.657062292098999 }, { "id": "identity_203129", "image": "PMC-VQA/images/PMC8124336_ijms-22-04818-f003_441955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Five arrows" } ], "clip_score": 21.734375, "loss": 3.653669834136963 }, { "id": "identity_277506", "image": "GeoQA+/images/466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, angle A is 50°. The inscribed circle I is tangent to sides BC, CA, and AB at points D, E, and F respectively. What is the degree measure of angle EDF? \n(A) 55°\n(B) 60°\n(C) 65°\n(D) 70°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.1875, "loss": 2.3639073371887207 }, { "id": "identity_327794", "image": "PlotQA/images/146235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the 3rd group of bars?" }, { "from": "gpt", "value": "The answer is Services" } ], "clip_score": 18.015625, "loss": 4.685498237609863 }, { "id": "identity_114951", "image": "Super-CLEVR/images/superCLEVR_new_000639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the vehicle in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.796875, "loss": 2.0235230922698975 }, { "id": "identity_61378", "image": "DocVQA/images/pnbx0223_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in Surplus available for Appropriation between the years 2009 and 2010?" }, { "from": "gpt", "value": "The answer is 931.1 crores" } ], "clip_score": 21.765625, "loss": 2.1432313919067383 }, { "id": "identity_147411", "image": "DocVQA/images/sllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number written at the bottom right corner of the document?" }, { "from": "gpt", "value": "The answer is 450008" } ], "clip_score": 21.796875, "loss": 2.507030487060547 }, { "id": "identity_197317", "image": "PMC-VQA/images/PMC8425862_advs2784-fig-0007_53973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any cellular structures indicated by both red and black arrows?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.453125, "loss": 2.989269256591797 }, { "id": "identity_148681", "image": "DocVQA/images/khbg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lectures are scheduled for Tuesday, 8 June - AM?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.8359375, "loss": 2.308978319168091 }, { "id": "identity_1317", "image": "TextVQA/images/825db4b292fb8acc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is their team name?" }, { "from": "gpt", "value": "The answer is sipin" } ], "clip_score": 23.453125, "loss": 3.885035753250122 }, { "id": "identity_189875", "image": "A-OKVQA/images/522461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the majority of people in the image wearing helmets?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.046875, "loss": 3.1644508838653564 }, { "id": "identity_105124", "image": "TextVQA/images/a51887558259c0cd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hanging black and white photographs are visible?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.609375, "loss": 2.369680643081665 }, { "id": "identity_90530", "image": "GeoQA+/images/967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that line segments AM and CM bisect angles BAD and BCD respectively, and angle D is 42 degrees, what is the measure of angle AMC?" }, { "from": "gpt", "value": "The answer is 52 degrees" } ], "clip_score": 29.703125, "loss": 2.2447822093963623 }, { "id": "identity_337829", "image": "DocVQA/images/pkpj0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heading of the page?" }, { "from": "gpt", "value": "The answer is historical background" } ], "clip_score": 22.53125, "loss": 3.506995439529419 }, { "id": "identity_301153", "image": "ScienceQA/images/6644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option is a bird?\nChoices:\n(A) clownfish\n(B) Surinam horned frog\n(C) tiger shark\n(D) brown pelican" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.6875, "loss": 4.249908447265625 }, { "id": "identity_317994", "image": "TQA/images/question_images/human_system_nervous_2956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobe is shown in red?\nChoices:\n(A) Occipital Lobe\n(B) Parietal Lobe\n(C) Temporal Lobe\n(D) Frontal Lobe" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.1875, "loss": 3.7871882915496826 }, { "id": "identity_268518", "image": "GeoQA+/images/2551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, it is known that BE and CF are the two altitudes of triangle ABC, intersecting at point H. If ∠BAC = 50°, what is the measure of ∠BHC? \n(A) 115°\n(B) 120°\n(C) 125°\n(D) 130°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.03125, "loss": 2.3375232219696045 }, { "id": "identity_33690", "image": "TabMWP/images/22616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A vendor who sold bread at farmers' markets counted the number of leftover loaves at the end of each day. According to the table, what was the rate of change between Tuesday and Wednesday?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 37.59375, "loss": 2.805027723312378 }, { "id": "identity_114452", "image": "VQA-AS/images/16150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the chair the person is sitting on?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.09375, "loss": 3.1973652839660645 }, { "id": "identity_7499", "image": "TextVQA/images/f382dc4def4feb2e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand of alcohol?" }, { "from": "gpt", "value": "The answer is old rosie" } ], "clip_score": 26.375, "loss": 3.028919219970703 }, { "id": "identity_228862", "image": "TQA/images/question_images/human_system_excretory_6113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ureters are there in the human body, and what is their function?" }, { "from": "gpt", "value": "The answer is 2 ureters, transport urine from kidneys to bladder" } ], "clip_score": 34.4375, "loss": 1.6947828531265259 }, { "id": "identity_13181", "image": "CLEVR-Math/images/CLEVR_train_040330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all blue cubes. Subtract all purple balls. How many cubes are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.546875, "loss": 2.5839462280273438 }, { "id": "identity_183297", "image": "PMC-VQA/images/PMC6004303_fig-1_329881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"RL\"?" }, { "from": "gpt", "value": "The answer is The function is to increase surface area for attachment and support" } ], "clip_score": 20.296875, "loss": 3.4120030403137207 }, { "id": "identity_288158", "image": "DocVQA/images/frld0217_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the page?" }, { "from": "gpt", "value": "The answer is neurontin 1998 tactical plan" } ], "clip_score": 27.859375, "loss": 2.2325053215026855 }, { "id": "identity_120419", "image": "VQA2.0/images/207455.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wooden crates are stacked on the green truck?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 29.671875, "loss": 4.098194122314453 }, { "id": "identity_86830", "image": "GeoQA+/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OA is a radius of the circle and OA⊥BC, what is the length of OC given that the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 33.0625, "loss": 2.0398128032684326 }, { "id": "identity_70559", "image": "ScienceQA/images/7144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible texture or pattern on the surface of the ring in the image?" }, { "from": "gpt", "value": "The answer is No visible texture or pattern" } ], "clip_score": 29.703125, "loss": 3.208617925643921 }, { "id": "identity_275083", "image": "PMC-VQA/images/PMC8618346_viruses-13-02109-f003_121105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by the red spheres in the segmented 3D volume?\nChoices:\n(A) Mature virions\n(B) Immature virions\n(C) Cytoplasm\n(D) Nucleus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.25, "loss": 4.6458611488342285 }, { "id": "identity_135343", "image": "ChartQA/images/multi_col_100982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of accounts on both Tumblr and Twitter?" }, { "from": "gpt", "value": "The answer is 2.9 million" } ], "clip_score": 28.984375, "loss": 2.1944046020507812 }, { "id": "identity_29885", "image": "A-OKVQA/images/91636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who manufactured this motorcycle?\nChoices:\n(A) honda\n(B) harley davidson\n(C) suzuki\n(D) kawasaki" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.890625, "loss": 4.505435466766357 }, { "id": "identity_287190", "image": "PMC-VQA/images/PMC9187638_Fig2_308785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging techniques were utilized in the analysis? \nOptions:\n(A) MRI and CT\n(B) X-ray and CT\n(C) MRI and MRA\n(D) PET and MRA" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.6875, "loss": 4.50714111328125 }, { "id": "identity_88445", "image": "UniGeo/images/1359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠APB = 50°, what is the measure of ∠PAC?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.03125, "loss": 1.7979581356048584 }, { "id": "identity_196749", "image": "A-OKVQA/images/10145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is placed next to the TV?" }, { "from": "gpt", "value": "The answer is It is a Dracaena plant" } ], "clip_score": 23.453125, "loss": 2.8503377437591553 }, { "id": "identity_200067", "image": "VQA2.0/images/533494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number written on the side of the white vehicle?" }, { "from": "gpt", "value": "The answer is 11342" } ], "clip_score": 20.203125, "loss": 2.510969400405884 }, { "id": "identity_11225", "image": "AI2D/images/3549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter D represent?\nChoices:\n(A) Golgi Bodies\n(B) Vacuole\n(C) Mitochondria\n(D) Cytoplasm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.34375, "loss": 4.706942081451416 }, { "id": "identity_269838", "image": "PMC-VQA/images/PMC7968257_Fig3_393535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging sequence was utilized to capture the image? \nOptions:\n(A) T1 sequence\n(B) T2 sequence\n(C) DWI sequence\n(D) FLAIR sequence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.078125, "loss": 3.9261207580566406 }, { "id": "identity_324925", "image": "TabMWP/images/29845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sprinklers are on 4 football fields if each field has 10 sprinklers?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 40.53125, "loss": 1.497150182723999 }, { "id": "identity_302189", "image": "TextVQA/images/2ae2114a4a2f116b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is displayed on this?" }, { "from": "gpt", "value": "The answer is guess" } ], "clip_score": 19.796875, "loss": 3.2330825328826904 }, { "id": "identity_325515", "image": "PMC-VQA/images/PMC7138870_Fig10_146243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the narrowing located in the body?\nChoices:\n(A) Pulmonary artery\n(B) Aortic arch\n(C) Carotid artery\n(D) Renal vein" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.59375, "loss": 4.132625579833984 }, { "id": "identity_17757", "image": "AI2D/images/3059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tube that comes off the kidney to the bladder?\nChoices:\n(A) cortex\n(B) medulla\n(C) pyramid\n(D) ureter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.875, "loss": 3.7101149559020996 }, { "id": "identity_243566", "image": "TextVQA/images/36d8f0024be1a1a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the labels of the first three sections in the top row from left to right?" }, { "from": "gpt", "value": "The answer is ADULT EDUCATION, ADULT EDUCATION, AFARD" } ], "clip_score": 21.46875, "loss": 2.977816581726074 }, { "id": "identity_79424", "image": "GeoQA+/images/4415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the ladder and the ground after it slides down 4m?" }, { "from": "gpt", "value": "The answer is arctan(20/15) degrees" } ], "clip_score": 28.609375, "loss": 2.2945709228515625 }, { "id": "identity_125982", "image": "TextVQA/images/578a0e1a3660985e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather conditions can be inferred from the sky in the image?" }, { "from": "gpt", "value": "The answer is Partly cloudy" } ], "clip_score": 18.578125, "loss": 2.5326123237609863 }, { "id": "identity_108489", "image": "VQA2.0/images/335405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction does the road lead to in the image?" }, { "from": "gpt", "value": "The answer is Towards the horizon" } ], "clip_score": 29.609375, "loss": 3.2836174964904785 }, { "id": "identity_202181", "image": "A-OKVQA/images/434990.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition at the time of the incident?" }, { "from": "gpt", "value": "The answer is Overcast and wet conditions" } ], "clip_score": 20.4375, "loss": 3.843703269958496 }, { "id": "identity_242852", "image": "ScienceQA/images/3529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the material used to make this canoe, and how do these characteristics contribute to its functionality in water?" }, { "from": "gpt", "value": "The answer is Buoyancy, strength, workability, and durability contribute to functionality" } ], "clip_score": 29.03125, "loss": 3.519580125808716 }, { "id": "identity_62431", "image": "DocVQA/images/qxhc0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number mentioned for underscores?" }, { "from": "gpt", "value": "The answer is P. 139" } ], "clip_score": 15.59375, "loss": 2.8369805812835693 }, { "id": "identity_222784", "image": "PMC-VQA/images/PMC2562460_F6_28478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar at the bottom of the image indicating?" }, { "from": "gpt", "value": "The answer is Reference for size of structures in known units" } ], "clip_score": 21.03125, "loss": 4.599709987640381 }, { "id": "identity_70514", "image": "TQA/images/abc_question_images/seasons_16286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter corresponds to the Autumnal Equinox?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.921875, "loss": 3.003305435180664 }, { "id": "identity_190802", "image": "VizWiz/images/VizWiz_train_00013240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the food chopper have a pulse control feature?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.109375, "loss": 3.1786839962005615 }, { "id": "identity_185433", "image": "PMC-VQA/images/PMC7427980_Fig5_234416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see the portal vein in this liver MRI?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.875, "loss": 2.667503833770752 }, { "id": "identity_13902", "image": "UniGeo/images/1310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点c是⊙O的直径AB延长线上一点,CD切⊙O于点D,DE为⊙O的弦,若∠AED=60°,⊙O的半径是2.则CD的长()\nChoices:\n(A) 4\n(B) 3\n(C) 2√{3}\n(D) 2√{2}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.703125, "loss": 2.441209077835083 }, { "id": "identity_139992", "image": "DocVQA/images/hmvj0223_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of ROCE?" }, { "from": "gpt", "value": "The answer is 0.77" } ], "clip_score": 18.453125, "loss": 2.3293657302856445 }, { "id": "identity_54883", "image": "FigureQA/images/81961.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Dark Blue line consistently above the Seafoam line throughout the graph?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.609375, "loss": 3.2034192085266113 }, { "id": "identity_113434", "image": "PMC-VQA/images/PMC3114847_pone-0020979-g003_98975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the main structure in Panel B?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.890625, "loss": 3.2609925270080566 }, { "id": "identity_223201", "image": "TextVQA/images/05dc2aba2eb24411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the gloves worn by both fighters?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.328125, "loss": 3.0194778442382812 }, { "id": "identity_197490", "image": "TextVQA/images/8994f0a839f29131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the flower in the background?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 9.3828125, "loss": 2.7676432132720947 }, { "id": "identity_184373", "image": "PMC-VQA/images/PMC7408080_brainsci-10-00440-f001_228733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnified area in the inset image showing?" }, { "from": "gpt", "value": "The answer is Detailed view of a lesion" } ], "clip_score": 24.140625, "loss": 3.468019962310791 }, { "id": "identity_286980", "image": "FigureQA/images/13750.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Green Yellow or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.953125, "loss": 3.824028491973877 }, { "id": "identity_65861", "image": "TQA/images/question_images/lewis_dot_diagrams_9148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the molecular formula of Acetylene as depicted in the image?" }, { "from": "gpt", "value": "The answer is C2H2" } ], "clip_score": 23.53125, "loss": 1.0951123237609863 }, { "id": "identity_140877", "image": "MapQA/images/map_5504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Texas in the highest value range category?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 24.703125, "loss": 3.248075246810913 }, { "id": "identity_13231", "image": "FigureQA/images/70974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Cornflower have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.171875, "loss": 3.914637327194214 }, { "id": "identity_272598", "image": "TextVQA/images/039f33b5834a956b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand does the beer belong to?" }, { "from": "gpt", "value": "The answer is carlsberg" } ], "clip_score": 33.53125, "loss": 2.9079203605651855 }, { "id": "identity_202208", "image": "PMC-VQA/images/PMC7471697_Fig3_248302.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality was used to obtain this image?" }, { "from": "gpt", "value": "The answer is Computed tomography (CT) scan" } ], "clip_score": 33.1875, "loss": 2.0075767040252686 }, { "id": "identity_2748", "image": "MapQA/images/map_9652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the first symbol in the legend represent the smallest category ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.46875, "loss": 3.0352768898010254 }, { "id": "identity_60574", "image": "PlotQA/images/64073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have a value added in manufacturing of less than 5% in 1965?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.046875, "loss": 2.4544172286987305 }, { "id": "identity_126514", "image": "PMC-VQA/images/PMC5917066_phy213668-fig-0001_306054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the red dashed line within the blue outline represent?" }, { "from": "gpt", "value": "The answer is A specific area or feature within the organ" } ], "clip_score": 19.6875, "loss": 3.952166795730591 }, { "id": "identity_55983", "image": "DocVQA/images/yfjd0228_107.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many subdivisions are there for the 25 ml Buret?" }, { "from": "gpt", "value": "The answer is 0.10 ml" } ], "clip_score": 17.25, "loss": 1.4595184326171875 }, { "id": "identity_299371", "image": "DocVQA/images/fgbv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values provided?" }, { "from": "gpt", "value": "The answer is 492.5" } ], "clip_score": 14.609375, "loss": 2.099159002304077 }, { "id": "identity_54805", "image": "FigureQA/images/37405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Dark Orchid section represent more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.53125, "loss": 3.160691738128662 }, { "id": "identity_249031", "image": "VQA2.0/images/537878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beds are in this room, and what other furniture or items can you identify that indicate this is a child's bedroom?" }, { "from": "gpt", "value": "The answer is Three beds, toys, bookshelf, pajamas, playful decor" } ], "clip_score": 32.3125, "loss": 2.8140902519226074 }, { "id": "identity_11084", "image": "Geometry3K/images/1325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the regular polygon. Round to the nearest tenth.\nChoices:\n(A) 31.2\n(B) 62.4\n(C) 124.8\n(D) 144" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.625, "loss": 2.8772387504577637 }, { "id": "identity_85674", "image": "GeoQA+/images/3879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the slant height of the cone is 5, what is the area of the sector of the circle in the unfolded diagram?" }, { "from": "gpt", "value": "The answer is 15π square units" } ], "clip_score": 34.1875, "loss": 1.5315606594085693 }, { "id": "identity_92158", "image": "IconQA/images/88942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the sequence have a pointed tip?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.796875, "loss": 2.4954280853271484 }, { "id": "identity_13278", "image": "VQA2.0/images/282703.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is someone getting ready to do a craft project?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.15625, "loss": 3.1045889854431152 }, { "id": "identity_217556", "image": "A-OKVQA/images/159979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vessels are floating on the water?" }, { "from": "gpt", "value": "The answer is Boats" } ], "clip_score": 20.375, "loss": 2.6776187419891357 }, { "id": "identity_286807", "image": "VQA2.0/images/241554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of tires that can be seen in the picture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.9375, "loss": 2.3654916286468506 }, { "id": "identity_264370", "image": "DocVQA/images/zyjf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who will discover that NN622 exhibits inhibitory effects on carbonic anhydrase and actos?" }, { "from": "gpt", "value": "The answer is Dr. Odaka" } ], "clip_score": 22.0625, "loss": 3.587925434112549 }, { "id": "identity_141333", "image": "PlotQA/images/65607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of patent applications filed by residents of the United Kingdom in 1983?" }, { "from": "gpt", "value": "The answer is 20000" } ], "clip_score": 39.09375, "loss": 1.891007661819458 }, { "id": "identity_228059", "image": "TQA/images/abc_question_images/skin_cross_section_17583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part is primarily responsible for thermoregulation and also contains the most amount of adipose tissue?" }, { "from": "gpt", "value": "The answer is P" } ], "clip_score": 28.625, "loss": 2.8677151203155518 }, { "id": "identity_228138", "image": "AI2D/images/2977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the structures located at the top and bottom of the diagram, and what are their respective functions?" }, { "from": "gpt", "value": "The answer is Dorsal horn receives sensory information; ventral horn sends motor signals" } ], "clip_score": 34.46875, "loss": 2.5557169914245605 }, { "id": "identity_168446", "image": "IconQA/images/89801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fish are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.5625, "loss": 1.9923332929611206 }, { "id": "identity_19039", "image": "ScienceQA/images/136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the statement.\nPalladium is ().\nChoices:\n(A) a compound\n(B) an elementary substance" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.640625, "loss": 3.403254747390747 }, { "id": "identity_203751", "image": "PMC-VQA/images/PMC6419362_Fig16_451805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Microscopy" } ], "clip_score": 28.78125, "loss": 2.151055097579956 }, { "id": "identity_84681", "image": "Geometry3K/images/1250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If x = 10, find the length of side EF" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 31.46875, "loss": 3.0750558376312256 }, { "id": "identity_160073", "image": "GeoQA+/images/964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given square ABCD, if AE=3 and CF=1, what is the length of segment OF?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.53125, "loss": 2.6431031227111816 }, { "id": "identity_296586", "image": "DocVQA/images/fgfl0225_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does Camel Snus refer to?" }, { "from": "gpt", "value": "The answer is type of moist snuff tobacco product in a pouch that is spitless." } ], "clip_score": 18.984375, "loss": 1.5675405263900757 }, { "id": "identity_281156", "image": "GeoQA+/images/3712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, to measure the distance between two points A and B located next to a pond, point O is marked on the ground. Midpoints C and D are determined on OA and OB respectively, and CD is measured to be 20m. What is the distance between points A and B?\nChoices:\n(A) 10m\n(B) 20m\n(C) 40m\n(D) 80m" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.34375, "loss": 2.6269614696502686 }, { "id": "identity_32621", "image": "DVQA/images/bar_train_00038226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is the least preferred?" }, { "from": "gpt", "value": "The answer is boss" } ], "clip_score": 29.46875, "loss": 2.630474805831909 }, { "id": "identity_16917", "image": "UniGeo/images/1631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,点D、E分别为边AB、AC上的点,且DE∥BC,若AD=5,BD=10,AE=3,则CE的长为()\nChoices:\n(A) 3\n(B) 6\n(C) 9\n(D) 12" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 2.4808175563812256 }, { "id": "identity_78250", "image": "AI2D/images/3608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the passage that connects the nasal cavity to the pharynx?" }, { "from": "gpt", "value": "The answer is Nasal cavity" } ], "clip_score": 31.40625, "loss": 2.033315896987915 }, { "id": "identity_266542", "image": "ScienceQA/images/7203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood that a pea plant resulting from this cross will have round peas?\nChoices:\n(A) 4/4\n(B) 1/4\n(C) 2/4\n(D) 0/4\n(E) 3/4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.515625, "loss": 3.399460554122925 }, { "id": "identity_245673", "image": "Super-CLEVR/images/superCLEVR_new_000443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are either cars in front of the bicycle, objects that are right of the tiny red motorbike, or objects that are behind the bus?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.53125, "loss": 2.619048833847046 }, { "id": "identity_48184", "image": "PlotQA/images/153246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the price value of exports for Ghana greater than 0?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.1875, "loss": 3.3816275596618652 }, { "id": "identity_6804", "image": "TextVQA/images/8055b871e3ce20cf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what drink is advertised in the background?" }, { "from": "gpt", "value": "The answer is red bull" } ], "clip_score": 19.671875, "loss": 2.5263559818267822 }, { "id": "identity_227736", "image": "IconQA/images/24633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, what is the probability that both marbles will be of the same color?" }, { "from": "gpt", "value": "The answer is \\(\\frac{1}{3}\\)" } ], "clip_score": 28.96875, "loss": 2.3313796520233154 }, { "id": "identity_149307", "image": "DocVQA/images/sjbw0217_43.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What services could DesignWrite provide related to meetings?" }, { "from": "gpt", "value": "The answer is Newsletters, booklets, lecture kits, proceeding publications" } ], "clip_score": 25.5, "loss": 3.8603625297546387 }, { "id": "identity_137018", "image": "DVQA/images/bar_train_00177999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the 'truth' bar in the 'night' group?" }, { "from": "gpt", "value": "The answer is 5.9" } ], "clip_score": 23.796875, "loss": 2.598675012588501 }, { "id": "identity_265754", "image": "UniGeo/images/1109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, ABCD is an inscribed quadrilateral in circle O, E is a point on AB, and ∠CBE = 50°. What is the measure of ∠AOC?\nChoices:\n(A) 25°\n(B) 50°\n(C) 100°\n(D) 130°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.6875, "loss": 2.788318634033203 }, { "id": "identity_331785", "image": "VizWiz/images/VizWiz_train_00010803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is unknown" } ], "clip_score": 14.8515625, "loss": 3.9030773639678955 }, { "id": "identity_50562", "image": "DVQA/images/bar_train_00030172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a \"ward\" value of 5 or more?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.109375, "loss": 2.4326021671295166 }, { "id": "identity_271118", "image": "TQA/images/question_images/velocity_time_graphs_8215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When distance and time are increasing, what is the outcome in terms of speed?\nChoices:\n(A) We are getting faster\n(B) We are getting slower\n(C) We are steady\n(D) We are stationary" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.515625, "loss": 4.202199935913086 }, { "id": "identity_141982", "image": "DocVQA/images/jlpn0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Were any flavor or adhesives rejected this week?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.40625, "loss": 2.640249729156494 }, { "id": "identity_40454", "image": "FigureQA/images/84768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Green Yellow at x-axis label 50?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 27.53125, "loss": 2.850311040878296 }, { "id": "identity_291150", "image": "VizWiz/images/VizWiz_train_00019490.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of this object?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 25.71875, "loss": 3.3892226219177246 }, { "id": "identity_170331", "image": "IconQA/images/32231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares tall is the letter after the transformation?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.796875, "loss": 2.1878063678741455 }, { "id": "identity_196344", "image": "TextVQA/images/55eb940224c400ea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many locations are marked with images on the right side of the map?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.484375, "loss": 2.6075632572174072 }, { "id": "identity_221020", "image": "TextVQA/images/bf0d3b027a972c2d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles have a neck label with a shield and crown design?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.40625, "loss": 2.6673622131347656 }, { "id": "identity_10484", "image": "TextVQA/images/58fce8948886c3b5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: in what year did the brewery begin?" }, { "from": "gpt", "value": "The answer is 1795" } ], "clip_score": 18.296875, "loss": 1.4657495021820068 }, { "id": "identity_260449", "image": "TabMWP/images/37734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual rate of change in the per-student budget from 2014 to 2018?" }, { "from": "gpt", "value": "The answer is $1.75" } ], "clip_score": 27.015625, "loss": 2.0159311294555664 }, { "id": "identity_35882", "image": "PMC-VQA/images/PMC4182018_fig3_324274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the hypointensity shown in the image? \nChoices:\n(A) Calcification \n(B) Hemorrhage \n(C) Infection \n(D) Artefact " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.624614238739014 }, { "id": "identity_103350", "image": "A-OKVQA/images/446481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible traffic lights are showing a red signal?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 22.421875, "loss": 4.551138401031494 }, { "id": "identity_104300", "image": "VQA2.0/images/393503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.546875, "loss": 2.3397514820098877 }, { "id": "identity_137669", "image": "FigureQA/images/26154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph have a title?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.265625, "loss": 3.264755964279175 }, { "id": "identity_248403", "image": "IconQA/images/33889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of the second bear and the third train?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 27.78125, "loss": 2.0257840156555176 }, { "id": "identity_165382", "image": "GeoQA+/images/1566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line l1 is parallel to line l2, and the measure of angle 2 is 75 degrees, what is the measure of the interior angle adjacent to angle 2 within the triangle?" }, { "from": "gpt", "value": "The answer is 105 degrees" } ], "clip_score": 33.84375, "loss": 1.489092469215393 }, { "id": "identity_181069", "image": "IconQA/images/20529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the umbrella is colored yellow?" }, { "from": "gpt", "value": "The answer is 1/2 or 0.5" } ], "clip_score": 35.96875, "loss": 1.3661531209945679 }, { "id": "identity_303533", "image": "FigureQA/images/34765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Blue Violet?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.34375, "loss": 3.689420223236084 }, { "id": "identity_325132", "image": "A-OKVQA/images/297910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What animal are the individuals observing?\nChoices:\n(A) goats\n(B) lambs\n(C) donkeys\n(D) horses" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.0, "loss": 3.7978224754333496 }, { "id": "identity_250356", "image": "PMC-VQA/images/PMC7744209_FIG1_322024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size and location of the lesion seen in the MRI, and which lobe of the brain is it located in?" }, { "from": "gpt", "value": "The answer is 3.1 x 2.3 cm in the right parietal lobe" } ], "clip_score": 34.90625, "loss": 1.926209568977356 }, { "id": "identity_260332", "image": "TextVQA/images/dddbc043975b3283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the event in days, and in which month and year does it occur?" }, { "from": "gpt", "value": "The answer is 5 days, September 2008" } ], "clip_score": 12.7578125, "loss": 1.750567078590393 }, { "id": "identity_174290", "image": "TabMWP/images/32910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of problems on the tests?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 23.765625, "loss": 3.1602516174316406 }, { "id": "identity_51356", "image": "MapQA/images/map_10779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the maximum number of employees according to the map?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 38.75, "loss": 2.0916008949279785 }, { "id": "identity_195805", "image": "VQA2.0/images/356664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible sections does the train car have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 21.953125, "loss": 4.404234886169434 }, { "id": "identity_66455", "image": "TQA/images/question_images/em_spectrum_6819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the frequency range for microwave radiation in Hz?" }, { "from": "gpt", "value": "The answer is 10^8 to 10^11 Hz" } ], "clip_score": 21.078125, "loss": 0.9755476713180542 }, { "id": "identity_3867", "image": "VQA2.0/images/196723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the black and white kite?" }, { "from": "gpt", "value": "The answer is panda" } ], "clip_score": 29.359375, "loss": 2.67008900642395 }, { "id": "identity_243266", "image": "Geometry3K/images/1408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the area of the triangle" }, { "from": "gpt", "value": "The answer is \\( 18\\sqrt{5} \\)" } ], "clip_score": 34.96875, "loss": 2.7553176879882812 }, { "id": "identity_179894", "image": "IconQA/images/24499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups of dots are there?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.09375, "loss": 2.1520090103149414 }, { "id": "identity_114662", "image": "PMC-VQA/images/PMC4864780_Fig5_29969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the cells within the organism uniform in size?" }, { "from": "gpt", "value": "The answer is Yes, the cells are mostly uniform in size" } ], "clip_score": 20.796875, "loss": 2.4814741611480713 }, { "id": "identity_271313", "image": "TQA/images/question_images/sun_layers_3994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of layers present in the sun?\nChoices:\n(A) 4\n(B) 1\n(C) 2\n(D) 3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.453125, "loss": 3.211728572845459 }, { "id": "identity_47421", "image": "DocVQA/images/ttyc0003_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which date does the TAR value first reach above 100 mg/cig?" }, { "from": "gpt", "value": "The answer is 8/16/93" } ], "clip_score": 20.140625, "loss": 1.9416866302490234 }, { "id": "identity_51748", "image": "FigureQA/images/4808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the largest portion of the pie chart?" }, { "from": "gpt", "value": "The answer is Khaki" } ], "clip_score": 34.65625, "loss": 3.712400197982788 }, { "id": "identity_166570", "image": "TabMWP/images/15442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students in total were surveyed who do not do volunteer work?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 29.875, "loss": 2.344726085662842 }, { "id": "identity_143551", "image": "DVQA/images/bar_train_00011672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per item?" }, { "from": "gpt", "value": "The answer is 5.8" } ], "clip_score": 24.71875, "loss": 2.337127208709717 }, { "id": "identity_137013", "image": "ChartQA/images/multi_col_841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more did China import in 2018 compared to Bangladesh in the same year?" }, { "from": "gpt", "value": "The answer is 12,801 million euros" } ], "clip_score": 27.96875, "loss": 2.481316328048706 }, { "id": "identity_16788", "image": "TextVQA/images/ff0ef7dc6d86e204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: is this on espn?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.3125, "loss": 2.9904873371124268 }, { "id": "identity_49204", "image": "FigureQA/images/90355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Gray line intersect with the Gold line?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.515625, "loss": 2.2535340785980225 }, { "id": "identity_296408", "image": "IconQA/images/61785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of dots?\nChoices:\n(A) 8\n(B) 9\n(C) 6\n(D) 2\n(E) 7" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.046875, "loss": 3.570519208908081 }, { "id": "identity_198589", "image": "A-OKVQA/images/50927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batter's uniform?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.859375, "loss": 3.844769239425659 }, { "id": "identity_128906", "image": "VQA-RAD/images/synpic51212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is shown in this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.65625, "loss": 2.630413293838501 }, { "id": "identity_167683", "image": "TabMWP/images/35793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students have more than 1 pet?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.234375, "loss": 2.659841299057007 }, { "id": "identity_26547", "image": "CLEVR-Math/images/CLEVR_train_014651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all green blocks. How many brown cylinders are left?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.015625, "loss": 2.4069254398345947 }, { "id": "identity_86577", "image": "UniGeo/images/3173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AF if AD is 6 units long?" }, { "from": "gpt", "value": "The answer is √(97/9)" } ], "clip_score": 26.953125, "loss": 3.0440070629119873 }, { "id": "identity_49908", "image": "DocVQA/images/txdw0217_44.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the table presented in the image?" }, { "from": "gpt", "value": "The answer is Effects of Various Treatments on Vasomotor Symptoms" } ], "clip_score": 17.640625, "loss": 2.6768205165863037 }, { "id": "identity_243381", "image": "DocVQA/images/nznm0227_82.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many presentations are scheduled between 1500 and 1700 on Thursday, August 9?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 14.9375, "loss": 2.7663323879241943 }, { "id": "identity_226505", "image": "DVQA/images/bar_train_00112250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many bars are there in total across all groups?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 16.9375, "loss": 2.2972166538238525 }, { "id": "identity_123683", "image": "PMC-VQA/images/PMC5250663_Fig1_116763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the cerebellum in this patient?" }, { "from": "gpt", "value": "The answer is Chiari malformation" } ], "clip_score": 32.625, "loss": 3.5892903804779053 }, { "id": "identity_248694", "image": "GeoQA+/images/2332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在⊙O中,弦AB、CD相交于点E,∠A=40°,∠B=30°,若∠C=50°,则∠AED的度数为多少?" }, { "from": "gpt", "value": "The answer is 115°" } ], "clip_score": 34.28125, "loss": 2.2182071208953857 }, { "id": "identity_127501", "image": "TextVQA/images/e684c27b650e0154.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many holes are there in the film strip border of the image?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 20.84375, "loss": 2.23742413520813 }, { "id": "identity_53530", "image": "DVQA/images/bar_train_00105987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people preferred the object step in the jaw category compared to the combat category?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 32.375, "loss": 2.3221805095672607 }, { "id": "identity_334783", "image": "A-OKVQA/images/411417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of item is hanging on this motorcycle?\nChoices:\n(A) air freshener\n(B) groceries\n(C) parking permit\n(D) necklace" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.109375, "loss": 4.009081840515137 }, { "id": "identity_288521", "image": "ScienceQA/images/17200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is used to make this chair?\nChoices:\n(A) wood\n(B) porcelain" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.6875, "loss": 4.194432258605957 }, { "id": "identity_93001", "image": "TabMWP/images/467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the parachuting adventure package price increases by 10%, how much will it cost?" }, { "from": "gpt", "value": "The answer is $401.50" } ], "clip_score": 37.25, "loss": 1.8120429515838623 }, { "id": "identity_289157", "image": "A-OKVQA/images/140068.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the man wearing a glove?\nChoices:\n(A) fashion\n(B) warmth\n(C) to catch\n(D) health" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.796875, "loss": 4.574937343597412 }, { "id": "identity_141526", "image": "DocVQA/images/xpcg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What university is R. A. Stallones, M.D. associated with?" }, { "from": "gpt", "value": "The answer is University of California" } ], "clip_score": 19.015625, "loss": 1.7536849975585938 }, { "id": "identity_188412", "image": "PMC-VQA/images/PMC2828401_F4_57868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the yellow arrow in the small grade 1 image?" }, { "from": "gpt", "value": "The answer is The knee joint space" } ], "clip_score": 28.828125, "loss": 4.173927307128906 }, { "id": "identity_222600", "image": "VizWiz/images/VizWiz_train_00015256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of meal is the Marie Callendar dinner as indicated on the packaging?" }, { "from": "gpt", "value": "The answer is Dinner" } ], "clip_score": 21.453125, "loss": 2.9656708240509033 }, { "id": "identity_142756", "image": "DVQA/images/bar_train_00010402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the 'route' category in the 'speed' group?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 21.609375, "loss": 3.0268747806549072 }, { "id": "identity_221253", "image": "VizWiz/images/VizWiz_train_00017750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything inside the trash can?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.6875, "loss": 2.808518648147583 }, { "id": "identity_259580", "image": "A-OKVQA/images/145447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the train car that is directly behind the red train car in the center of the three cars?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 25.46875, "loss": 3.7863411903381348 }, { "id": "identity_120283", "image": "PMC-VQA/images/PMC7909842_medicina-57-00087-f005_375248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the spine in this image?" }, { "from": "gpt", "value": "The answer is Lateral view" } ], "clip_score": 33.15625, "loss": 3.710078001022339 }, { "id": "identity_186908", "image": "TextVQA/images/b245e1f40adf7a35.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the goalkeeper's uniform?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.40625, "loss": 3.6802432537078857 }, { "id": "identity_39284", "image": "FigureQA/images/30756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have an equal distribution of portions among all colors?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.75, "loss": 3.4963183403015137 }, { "id": "identity_77673", "image": "TQA/images/abc_question_images/nuclear_energy_18110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many neutrons are emitted during the fission process at point 'X'?" }, { "from": "gpt", "value": "The answer is Three neutrons are emitted" } ], "clip_score": 27.265625, "loss": 2.6466360092163086 }, { "id": "identity_186867", "image": "A-OKVQA/images/61452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the wagon?" }, { "from": "gpt", "value": "The answer is FULLERS" } ], "clip_score": 25.21875, "loss": 3.789010524749756 }, { "id": "identity_53408", "image": "DocVQA/images/xhfg0227_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the total expenditure in 1960 was from public (Gov't) support?" }, { "from": "gpt", "value": "The answer is 17%" } ], "clip_score": 24.5, "loss": 1.7097878456115723 }, { "id": "identity_2410", "image": "VQA-AS/images/2859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do all of these people know each other?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.78125, "loss": 3.1128017902374268 }, { "id": "identity_99972", "image": "TabMWP/images/16952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of baseballs hit over the week?" }, { "from": "gpt", "value": "The answer is 471" } ], "clip_score": 36.15625, "loss": 2.210301399230957 }, { "id": "identity_146212", "image": "DocVQA/images/nrmg0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the wall type for Room 342 as per the room schedule?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 22.6875, "loss": 2.965789318084717 }, { "id": "identity_285770", "image": "DocVQA/images/qqlb0228_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which species has the highest absolute lethal dose?" }, { "from": "gpt", "value": "The answer is mice" } ], "clip_score": 16.890625, "loss": 2.4848320484161377 }, { "id": "identity_295386", "image": "VQA2.0/images/55517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it a cold day?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.109375, "loss": 3.2124359607696533 }, { "id": "identity_57875", "image": "FigureQA/images/78008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots represent the Magenta color on the graph?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 24.859375, "loss": 1.977880835533142 }, { "id": "identity_257687", "image": "PMC-VQA/images/PMC7324723_F3_201814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of Axial FLAIR imaging, and how do they differentiate it from T1 weighted imaging, Diffusion weighted imaging, and Sagittal T2 weighted imaging?" }, { "from": "gpt", "value": "The answer is Suppresses CSF, highlights lesions, horizontal plane" } ], "clip_score": 31.03125, "loss": 3.897634267807007 }, { "id": "identity_93069", "image": "IconQA/images/37479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fifth image in the sequence?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 17.71875, "loss": 2.6577489376068115 }, { "id": "identity_127126", "image": "VQA2.0/images/158567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the kite closest to the ground on the left side of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.609375, "loss": 3.3382275104522705 }, { "id": "identity_36906", "image": "PMC-VQA/images/PMC6721794_fig2s1_32467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What region do the boxes outline in the image? \nChoices:\n(A) Corpus Callosum \n(B) Neuronal body \n(C) Hypothalamus \n(D) NPFM neurons " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.03125, "loss": 3.736668348312378 }, { "id": "identity_66813", "image": "ScienceQA/images/3437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a decomposer in this ecosystem?" }, { "from": "gpt", "value": "The answer is Boletus fungus" } ], "clip_score": 24.765625, "loss": 3.297424793243408 }, { "id": "identity_68962", "image": "TQA/images/question_images/hydrocarbons_8094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many branches does the neopentane molecule have?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.25, "loss": 2.2172040939331055 }, { "id": "identity_94546", "image": "CLEVR-Math/images/CLEVR_train_062279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.84375, "loss": 2.6640098094940186 }, { "id": "identity_30687", "image": "VizWiz/images/VizWiz_train_00016301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this perfume?" }, { "from": "gpt", "value": "The answer is jordache women" } ], "clip_score": 34.84375, "loss": 3.62585711479187 }, { "id": "identity_87534", "image": "UniGeo/images/3241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the length of AE to the length of EC?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 30.421875, "loss": 1.553220272064209 }, { "id": "identity_128642", "image": "Super-CLEVR/images/superCLEVR_new_001534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jet's wings that is closest to the big yellow regular bus?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.171875, "loss": 3.990208864212036 }, { "id": "identity_174792", "image": "IconQA/images/18031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are there in total?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 25.96875, "loss": 1.977669358253479 }, { "id": "identity_108167", "image": "VizWiz/images/VizWiz_train_00001572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the pocket knife handle?" }, { "from": "gpt", "value": "The answer is Ribbed or Grooved" } ], "clip_score": 29.515625, "loss": 3.171584129333496 }, { "id": "identity_57819", "image": "PlotQA/images/105112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of GDP claimed by the Central government in the year 2002?" }, { "from": "gpt", "value": "The answer is 30%" } ], "clip_score": 29.5, "loss": 1.9438554048538208 }, { "id": "identity_253271", "image": "Super-CLEVR/images/superCLEVR_new_001377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of metallic objects in the image, and how many of them are larger than the tiny green metal object?" }, { "from": "gpt", "value": "The answer is 7 metallic objects in total, 5 larger than the tiny green metal object" } ], "clip_score": 22.578125, "loss": 1.63105309009552 }, { "id": "identity_221156", "image": "PMC-VQA/images/PMC6305567_Fig2_414831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify any other abnormalities in the T2-weighted MRI image besides the choroidal tumor?" }, { "from": "gpt", "value": "The answer is No other abnormalities identified" } ], "clip_score": 34.0, "loss": 3.5843544006347656 }, { "id": "identity_272228", "image": "CLEVR-Math/images/CLEVR_train_023673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the cylinders, what is the total number of objects remaining?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.25, "loss": 2.4091997146606445 }, { "id": "identity_211289", "image": "VQA2.0/images/146151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is likely taking place?" }, { "from": "gpt", "value": "The answer is A bike show or exhibition" } ], "clip_score": 28.34375, "loss": 2.682150363922119 }, { "id": "identity_283609", "image": "PMC-VQA/images/PMC8044440_f3_417269.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image was utilized to identify the existence of disease?\nOptions:\n(A) T2-weighted images\n(B) T1-weighted images\n(C) Proton Density-weighted images\n(D) Diffusion-weighted images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.734375, "loss": 4.057406425476074 }, { "id": "identity_280182", "image": "GEOS/images/015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement is not always true about the construction shown in the diagram, where a perpendicular line PX is constructed from point P to line l, intersecting at points A and B, and further intersecting line PX at point C?\nChoices:\n(A) PA = PB\n(B) AX = BX\n(C) PX = CX\n(D) AC = BC" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.890625, "loss": 3.611804246902466 }, { "id": "identity_138043", "image": "FigureQA/images/65151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a color that does not have any data points below the y-axis value of 50?" }, { "from": "gpt", "value": "The answer is Yes, Hot Pink" } ], "clip_score": 28.953125, "loss": 2.579035520553589 }, { "id": "identity_290871", "image": "CLEVR-Math/images/CLEVR_train_003373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all tiny blocks and small gray metallic objects, how many objects remain?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.15625, "loss": 2.513307809829712 }, { "id": "identity_298653", "image": "ScienceQA/images/4415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: Cuba, the Dominican Republic, Haiti, or Dominica?\nChoices:\n(A) Cuba\n(B) the Dominican Republic\n(C) Haiti\n(D) Dominica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.296875, "loss": 4.306623935699463 }, { "id": "identity_238877", "image": "DocVQA/images/nrdd0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the organization mentioned in the document, and who is the sender of the document?" }, { "from": "gpt", "value": "The answer is 2101 Constitution Avenue, Washington, D.C. 20418, Paul E. Johnson" } ], "clip_score": 22.234375, "loss": 1.4392170906066895 }, { "id": "identity_47949", "image": "ChartQA/images/multi_col_100187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the average math score for Aboriginal females in Nunavut greater than 200?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.25, "loss": 3.002703905105591 }, { "id": "identity_40785", "image": "DVQA/images/bar_train_00125935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the \"camera\" category have a higher total value than the \"shape\" category?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.65625, "loss": 3.2199676036834717 }, { "id": "identity_109175", "image": "TextVQA/images/23bfdbe38957354f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product?" }, { "from": "gpt", "value": "The answer is SERO" } ], "clip_score": 27.921875, "loss": 3.3049423694610596 }, { "id": "identity_295213", "image": "UniGeo/images/124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, where angle C is 90 degrees, AC is equal to BC, AD bisects angle CAB and intersects BC at point D, DE is perpendicular to AB with E as the foot, and AB measures 6cm. What is the perimeter of triangle DEB? \n(A) 4cm \n(B) 6cm \n(C) 8cm \n(D) 10cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.28125, "loss": 2.426058053970337 }, { "id": "identity_11378", "image": "Geometry3K/images/1342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find m \\widehat B C A.\nChoices:\n(A) 30\n(B) 45\n(C) 55\n(D) 180" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.65625, "loss": 2.5725998878479004 }, { "id": "identity_30467", "image": "TabMWP/images/33657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The aquarium's records showed how many tickets were sold in the past 4 days. How many tickets did the aquarium sell in all on Monday and Wednesday?" }, { "from": "gpt", "value": "The answer is 743" } ], "clip_score": 30.953125, "loss": 2.434800624847412 }, { "id": "identity_272406", "image": "GeoQA+/images/1231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the parallelograms ABCD and BEFG, where AB = BC and BG = BE, points A, B, E are collinear, P is the midpoint of segment DF. If ∠DCB = ∠GEF = 120°, what is the value of PG/PC?\n(A) √{2}\n(B) √{3}\n(C) √{2}/2\n(D) √{3}/3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.71875, "loss": 2.683232307434082 }, { "id": "identity_120037", "image": "TextVQA/images/abbc4bd5a7b96200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of support is being offered to dealers according to the advertisement?" }, { "from": "gpt", "value": "The answer is Sales Demonstration and Dealer Help Service" } ], "clip_score": 22.53125, "loss": 3.3795833587646484 }, { "id": "identity_100906", "image": "TabMWP/images/9444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pairs of shorts in 6 laundry baskets?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 24.640625, "loss": 1.724496603012085 }, { "id": "identity_37093", "image": "MapQA/images/map_5614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Alabama ?" }, { "from": "gpt", "value": "The answer is 524-1460" } ], "clip_score": 25.9375, "loss": 2.555769205093384 }, { "id": "identity_74449", "image": "TQA/images/question_images/cycle_nitrogen_6713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is depicted by the large purple arrow going from the atmosphere to the ground?" }, { "from": "gpt", "value": "The answer is Atmospheric fixation" } ], "clip_score": 24.265625, "loss": 2.2456271648406982 }, { "id": "identity_148543", "image": "DocVQA/images/qznm0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount allocated for supplies over the three years?" }, { "from": "gpt", "value": "The answer is $3,000" } ], "clip_score": 21.8125, "loss": 1.7115215063095093 }, { "id": "identity_332278", "image": "GeoQA+/images/2869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the diagram, with AB as the diameter of circle O, and points C and D on the same side of AB on the circle, if ∠ACD = 130°, then ∠BAD is:\n\nChoices:\n(A) 50°\n(B) 40°\n(C) 35°\n(D) 25°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.53125, "loss": 2.2322380542755127 }, { "id": "identity_197124", "image": "PMC-VQA/images/PMC7017548_Fig5_116604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a previous surgical intervention in the lumbar region?" }, { "from": "gpt", "value": "The answer is Yes, spinal hardware indicates previous surgery" } ], "clip_score": 27.6875, "loss": 3.0821011066436768 }, { "id": "identity_50876", "image": "DocVQA/images/xkmd0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the event?" }, { "from": "gpt", "value": "The answer is 3/12/98" } ], "clip_score": 17.8125, "loss": 2.176055669784546 }, { "id": "identity_319332", "image": "IconQA/images/4404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes tall is this?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.53125, "loss": 1.8518880605697632 }, { "id": "identity_90238", "image": "UniGeo/images/1470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of segment AB is tripled and the length of segment BC is doubled, what will be the new length of segment DF?" }, { "from": "gpt", "value": "The answer is 8/3" } ], "clip_score": 30.03125, "loss": 1.90827476978302 }, { "id": "identity_151497", "image": "ScienceQA/images/2463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If F represents the dominant allele for black fur and f represents the recessive allele for brown fur, what are the possible fur colors for the offspring?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 30.140625, "loss": 4.801215171813965 }, { "id": "identity_210549", "image": "PMC-VQA/images/PMC8145651_jof-07-00346-f001_448978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lung fields clear of any consolidation in the image?" }, { "from": "gpt", "value": "The answer is Lung fields are clear" } ], "clip_score": 30.546875, "loss": 3.2800776958465576 }, { "id": "identity_240312", "image": "IconQA/images/69253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the second boat in the sequence?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 17.609375, "loss": 2.847302198410034 }, { "id": "identity_107638", "image": "PMC-VQA/images/PMC3744869_F5_225086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any marking on the image that is not green or light blue?" }, { "from": "gpt", "value": "The answer is Yes, there is a red marking" } ], "clip_score": 26.046875, "loss": 2.038180112838745 }, { "id": "identity_12414", "image": "TextVQA/images/27b57ca3f09f5272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what year was guinness established?" }, { "from": "gpt", "value": "The answer is 1759" } ], "clip_score": 30.046875, "loss": 1.0698400735855103 }, { "id": "identity_236419", "image": "AI2D/images/2002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organisms rely on other organisms that eat grass or plants for food?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.09375, "loss": 2.446201801300049 }, { "id": "identity_275426", "image": "A-OKVQA/images/221803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On the clock tower, how many clock faces are visible? \n(A) two\n(B) four\n(C) one\n(D) three" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.3125, "loss": 3.700562000274658 }, { "id": "identity_235428", "image": "DVQA/images/bar_train_00149197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the design element appear across all categories?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.46875, "loss": 2.175311803817749 }, { "id": "identity_229574", "image": "FigureQA/images/51886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the percentages of the two largest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 70%" } ], "clip_score": 32.375, "loss": 1.6815214157104492 }, { "id": "identity_268418", "image": "DVQA/images/bar_train_00154141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many algorithms is the accuracy less than 6 in at least one dataset?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 34.8125, "loss": 3.609989881515503 }, { "id": "identity_250022", "image": "TabMWP/images/20180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bags that have more than 50 red buttons?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 20.859375, "loss": 2.120704174041748 }, { "id": "identity_261158", "image": "AI2D/images/3352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the labels for all parts of the insect's head in the image, starting from the topmost part to the bottommost part" }, { "from": "gpt", "value": "The answer is antennae, eyes, mouth" } ], "clip_score": 34.75, "loss": 1.9935826063156128 }, { "id": "identity_73375", "image": "ScienceQA/images/233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is New York" } ], "clip_score": 29.375, "loss": 2.942756414413452 }, { "id": "identity_129555", "image": "TextVQA/images/aeb3e59991e7351e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen in the image?" }, { "from": "gpt", "value": "The answer is An integer number representing the count of people" } ], "clip_score": 7.1328125, "loss": 3.562791109085083 }, { "id": "identity_138579", "image": "DVQA/images/bar_train_00150154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between \"beauty\" and \"corps\" in the \"summer\" category?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 30.484375, "loss": 2.287200450897217 }, { "id": "identity_80717", "image": "Geometry3K/images/738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle JML?" }, { "from": "gpt", "value": "The answer is 145" } ], "clip_score": 31.90625, "loss": 2.4946975708007812 }, { "id": "identity_95443", "image": "TabMWP/images/7362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the three-legged race and the obstacle course events combined?" }, { "from": "gpt", "value": "The answer is 1 hour 10 minutes" } ], "clip_score": 29.203125, "loss": 1.881072998046875 }, { "id": "identity_180950", "image": "TabMWP/images/12273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students are in 3 train cars?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.46875, "loss": 1.683176040649414 }, { "id": "identity_166210", "image": "TabMWP/images/6622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clubs have fewer than 30 members?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.34375, "loss": 2.3873655796051025 }, { "id": "identity_229689", "image": "DocVQA/images/ymph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount due for the invoice, including an 8% interest charge if the payment is made 60 days after the invoice date?" }, { "from": "gpt", "value": "The answer is $224.92" } ], "clip_score": 26.421875, "loss": 2.3426332473754883 }, { "id": "identity_323388", "image": "MapQA/images/map_6674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any missing data on the map?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.921875, "loss": 2.75205135345459 }, { "id": "identity_330069", "image": "PMC-VQA/images/PMC3857525_F1_250494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what convention are these images displayed?\nChoices:\n(A) Radiological \n(B) Neurological \n(C) Inverted \n(D) Oblique" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 4.803289890289307 }, { "id": "identity_267863", "image": "A-OKVQA/images/474268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is currently located on the floor among the following options: antelope, surfboard, pumpkin, or egg sandwich?\nChoices:\n(A) antelope\n(B) surfboard\n(C) pumpkin\n(D) egg sandwich" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.5, "loss": 4.167606353759766 }, { "id": "identity_5480", "image": "AI2D/images/2106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Above is an example of a food chain. What would cause the number of mice to increase?\nChoices:\n(A) More hawks\n(B) More birds\n(C) More insects\n(D) More plants with seeds" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.125, "loss": 3.384495496749878 }, { "id": "identity_114107", "image": "VQA-AS/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 27.953125, "loss": 2.65798020362854 }, { "id": "identity_170696", "image": "TabMWP/images/20180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags had more than 50 red buttons?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.0, "loss": 2.23095440864563 }, { "id": "identity_172719", "image": "TabMWP/images/30060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Evelyn chooses to add an upgrade and buy a used phone, what will be the total change in price?" }, { "from": "gpt", "value": "The answer is -$15" } ], "clip_score": 32.90625, "loss": 2.585951805114746 }, { "id": "identity_252144", "image": "PMC-VQA/images/PMC7188759_F2_160292.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure indicated by the black arrow and describe its anatomical significance in relation to the external sheath of the rectus abdominis and the lateral edges of the linea alba" }, { "from": "gpt", "value": "The answer is Lateral edges of the linea alba, boundary for abdominal muscle attachment" } ], "clip_score": 31.109375, "loss": 2.708200454711914 }, { "id": "identity_317829", "image": "PlotQA/images/70231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label or title of this axis?" }, { "from": "gpt", "value": "The answer is % of children who received DPT vaccine" } ], "clip_score": 37.65625, "loss": 1.8921372890472412 }, { "id": "identity_41135", "image": "DocVQA/images/rfpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is John R. Alm" } ], "clip_score": 18.71875, "loss": 3.6627535820007324 }, { "id": "identity_86681", "image": "UniGeo/images/1426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle OAC?" }, { "from": "gpt", "value": "The answer is 4 + sqrt(8)" } ], "clip_score": 30.8125, "loss": 2.09625506401062 }, { "id": "identity_131472", "image": "A-OKVQA/images/67456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the airplane currently on the ground or in the air?" }, { "from": "gpt", "value": "The answer is On the ground" } ], "clip_score": 21.421875, "loss": 2.2935445308685303 }, { "id": "identity_81294", "image": "GeoQA+/images/1810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD is parallel to BC, and angle D equals angle ABC, and angle DEH equals 98 degrees, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 82 degrees" } ], "clip_score": 32.375, "loss": 2.1440322399139404 }, { "id": "identity_269762", "image": "GeoQA+/images/4663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, in quadrilateral ABCD, AB = 4, BC = 6, and the height AE on side BC = 2. What is the length of the height AF on side DC?\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.9375, "loss": 2.2446672916412354 }, { "id": "identity_51012", "image": "FigureQA/images/78594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value of the Green data points?" }, { "from": "gpt", "value": "The answer is 95" } ], "clip_score": 28.671875, "loss": 2.5444676876068115 }, { "id": "identity_79011", "image": "Geometry3K/images/564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle KJH?" }, { "from": "gpt", "value": "The answer is 57 degrees" } ], "clip_score": 32.75, "loss": 2.2791130542755127 }, { "id": "identity_219671", "image": "A-OKVQA/images/352288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the kites?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.625, "loss": 3.885857343673706 }, { "id": "identity_84570", "image": "UniGeo/images/525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume of the cone formed by the sector with a central angle of 120° and a radius of 9 cm?" }, { "from": "gpt", "value": "The answer is 18√2π cm³" } ], "clip_score": 16.84375, "loss": 2.4313712120056152 }, { "id": "identity_50209", "image": "FigureQA/images/99251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest peak value and at what x-axis value does it occur?" }, { "from": "gpt", "value": "The answer is Bubblegum at x-axis 10" } ], "clip_score": 27.296875, "loss": 2.201910972595215 }, { "id": "identity_246489", "image": "IconQA/images/33270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you determine the number of faces, edges, and vertices of this shape, and verify if Euler's formula (V - E + F = 2) holds for this polyhedron?" }, { "from": "gpt", "value": "The answer is Euler's formula holds" } ], "clip_score": 30.4375, "loss": 2.790295124053955 }, { "id": "identity_43637", "image": "DVQA/images/bar_train_00012056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the category 'hill' across all groups?" }, { "from": "gpt", "value": "The answer is 220" } ], "clip_score": 20.21875, "loss": 2.301865339279175 }, { "id": "identity_68458", "image": "AI2D/images/268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary consumer that feeds on grass in this desert food web" }, { "from": "gpt", "value": "The answer is Deer" } ], "clip_score": 36.5625, "loss": 2.8114588260650635 }, { "id": "identity_222558", "image": "A-OKVQA/images/38934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many houses are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.875, "loss": 4.610337257385254 }, { "id": "identity_75778", "image": "AI2D/images/2137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many final stages are possible for a star after the Supernova phase?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.796875, "loss": 2.6139049530029297 }, { "id": "identity_86298", "image": "Geometry3K/images/359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of \\angle 3 in the figure" }, { "from": "gpt", "value": "The answer is 100°" } ], "clip_score": 35.4375, "loss": 1.4932020902633667 }, { "id": "identity_164288", "image": "GeoQA+/images/430.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side BC in the right triangle ABC?" }, { "from": "gpt", "value": "The answer is The length of BC is 20" } ], "clip_score": 30.09375, "loss": 3.167316436767578 }, { "id": "identity_77751", "image": "AI2D/images/762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which phase of the moon would it be visible in the western sky in the morning?" }, { "from": "gpt", "value": "The answer is Full Moon" } ], "clip_score": 32.0, "loss": 2.355609893798828 }, { "id": "identity_154188", "image": "TQA/images/abc_question_images/cycle_water_14915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'R' represent in the image?" }, { "from": "gpt", "value": "The answer is Wind direction" } ], "clip_score": 27.265625, "loss": 3.179316759109497 }, { "id": "identity_23753", "image": "A-OKVQA/images/329664.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the man in the square hold do here?\nChoices:\n(A) films\n(B) keeps score\n(C) hides\n(D) sleeps" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.2578125, "loss": 4.084146976470947 }, { "id": "identity_175693", "image": "TabMWP/images/22111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode number of offices per floor?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.640625, "loss": 2.8630211353302 }, { "id": "identity_334296", "image": "TabMWP/images/7563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At Newport Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between these two years?" }, { "from": "gpt", "value": "The answer is -13" } ], "clip_score": 36.5, "loss": 3.695434808731079 }, { "id": "identity_93817", "image": "IconQA/images/85440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much longer is the bean's body compared to its stem to the nearest quarter inch?" }, { "from": "gpt", "value": "The answer is 4.75 inches" } ], "clip_score": 34.71875, "loss": 1.7302677631378174 }, { "id": "identity_32628", "image": "TabMWP/images/21778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The manager of a party supply store researched how many balloons it sold in the past 6 days. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 36.59375, "loss": 2.272564649581909 }, { "id": "identity_218555", "image": "TextVQA/images/1ee6fb55aa254fb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the website listed at the bottom of the poster?" }, { "from": "gpt", "value": "The answer is www.katxiporreta.com" } ], "clip_score": 22.484375, "loss": 3.1466517448425293 }, { "id": "identity_212367", "image": "A-OKVQA/images/98493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is displayed on the clock of the building?" }, { "from": "gpt", "value": "The answer is 11:55" } ], "clip_score": 17.375, "loss": 1.5959028005599976 }, { "id": "identity_147140", "image": "DocVQA/images/jqxf0227_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the category code for item number 63?" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 16.703125, "loss": 3.4173903465270996 }, { "id": "identity_73239", "image": "AI2D/images/2223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first stage of the Wild Atlantic salmon's life cycle as shown in the image?" }, { "from": "gpt", "value": "The answer is Eggs" } ], "clip_score": 34.09375, "loss": 2.216597080230713 }, { "id": "identity_215011", "image": "VQA2.0/images/221711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible herbs on this pizza?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.03125, "loss": 3.019012212753296 }, { "id": "identity_213104", "image": "VQA-AS/images/19051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of furniture is the person sitting on?" }, { "from": "gpt", "value": "The answer is A sofa" } ], "clip_score": 30.28125, "loss": 3.6874871253967285 }, { "id": "identity_183640", "image": "TextVQA/images/62579859904492f0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many American flags are displayed in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.21875, "loss": 2.488513708114624 }, { "id": "identity_94795", "image": "TabMWP/images/13444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in cost between the most expensive and least expensive key chains?" }, { "from": "gpt", "value": "The answer is $0.25" } ], "clip_score": 32.6875, "loss": 2.191546678543091 }, { "id": "identity_125889", "image": "A-OKVQA/images/324999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aircraft is displayed in the upper left corner?" }, { "from": "gpt", "value": "The answer is A propeller-driven airplane" } ], "clip_score": 18.5, "loss": 2.268049478530884 }, { "id": "identity_304000", "image": "AI2D/images/188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the label C indicate?\nChoices:\n(A) fault plane\n(B) foot wall\n(C) hanging wall\n(D) fault scarp" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.234375, "loss": 4.814967155456543 }, { "id": "identity_151664", "image": "ScienceQA/images/3590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which substance is likely to have the highest density?" }, { "from": "gpt", "value": "The answer is Honey" } ], "clip_score": 11.4609375, "loss": 2.5534603595733643 }, { "id": "identity_99115", "image": "TabMWP/images/6579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bags had fewer than 10 red buttons?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.28125, "loss": 2.3215456008911133 }, { "id": "identity_77708", "image": "TQA/images/question_images/periodic_table_8158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element in period 2 has the smallest atomic mass?" }, { "from": "gpt", "value": "The answer is Lithium (Li)" } ], "clip_score": 28.40625, "loss": 1.5136046409606934 }, { "id": "identity_142566", "image": "DocVQA/images/fnvj0223_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of Ravinder Kumar?" }, { "from": "gpt", "value": "The answer is Chairperson & Managing Director" } ], "clip_score": 21.4375, "loss": 2.6298270225524902 }, { "id": "identity_296148", "image": "DocVQA/images/qqdd0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On August 30, what is the registration time specified?" }, { "from": "gpt", "value": "The answer is 8:00 A.M. to 10:00 A.M." } ], "clip_score": 17.890625, "loss": 0.8487610220909119 }, { "id": "identity_139031", "image": "DVQA/images/bar_train_00125110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest number of units sold in the 'actor' store?" }, { "from": "gpt", "value": "The answer is threat" } ], "clip_score": 27.453125, "loss": 4.673785209655762 }, { "id": "identity_114305", "image": "Super-CLEVR/images/superCLEVR_new_001148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more metallic or matte objects in the image?" }, { "from": "gpt", "value": "The answer is More metallic objects" } ], "clip_score": 21.0, "loss": 2.136132001876831 }, { "id": "identity_152138", "image": "ScienceQA/images/16697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnetic polarity at the bottom of the lower magnet?" }, { "from": "gpt", "value": "The answer is North pole" } ], "clip_score": 22.28125, "loss": 3.797100067138672 }, { "id": "identity_73854", "image": "AI2D/images/1168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the jelly-like substance that fills the interior of the cell?" }, { "from": "gpt", "value": "The answer is Cytoplasm" } ], "clip_score": 31.421875, "loss": 2.2095775604248047 }, { "id": "identity_15310", "image": "DocVQA/images/qfdw0004_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the job title of Charles A. Blixt?" }, { "from": "gpt", "value": "The answer is Sr. Vice President & General Counsel" } ], "clip_score": 23.046875, "loss": 2.739664077758789 }, { "id": "identity_215504", "image": "VQA2.0/images/113315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is being projected on the screen?" }, { "from": "gpt", "value": "The answer is An underwater scene or marine life" } ], "clip_score": 15.7734375, "loss": 2.4837379455566406 }, { "id": "identity_95363", "image": "TabMWP/images/25544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between the most expensive and the least expensive mushrooms per pound?" }, { "from": "gpt", "value": "The answer is $20" } ], "clip_score": 34.09375, "loss": 2.428767681121826 }, { "id": "identity_118042", "image": "A-OKVQA/images/293895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is shown in the image?" }, { "from": "gpt", "value": "The answer is A store or shop" } ], "clip_score": 18.765625, "loss": 3.456683397293091 }, { "id": "identity_70216", "image": "AI2D/images/4108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the roots in the image?" }, { "from": "gpt", "value": "The answer is Absorbing water" } ], "clip_score": 26.34375, "loss": 3.0823028087615967 }, { "id": "identity_170403", "image": "IconQA/images/90852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is used in a game where landing on pink wins a point, how many points are expected to be won in 150 spins?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 29.15625, "loss": 2.2134501934051514 }, { "id": "identity_178221", "image": "IconQA/images/19966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are in total?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.28125, "loss": 2.047694444656372 }, { "id": "identity_7126", "image": "FigureQA/images/35047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.109375, "loss": 4.1403374671936035 }, { "id": "identity_281785", "image": "A-OKVQA/images/161725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which instrument is the man in the straw hat playing, with the options being saxophone, clarinet, trumpet, and guitar?\nChoices:\n(A) saxophone\n(B) clarinet\n(C) trumpet\n(D) guitar" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.78125, "loss": 3.8807098865509033 }, { "id": "identity_16594", "image": "CLEVR-Math/images/CLEVR_train_025777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cylinders. How many objects are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.59375, "loss": 2.2852749824523926 }, { "id": "identity_19322", "image": "PlotQA/images/142370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of solid fuel emission in Albania ?" }, { "from": "gpt", "value": "The answer is 27.4541101356744" } ], "clip_score": 35.34375, "loss": 2.7147374153137207 }, { "id": "identity_288517", "image": "TextVQA/images/ed2cef1ef6b652a3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sound made by the bike?" }, { "from": "gpt", "value": "The answer is camisa" } ], "clip_score": 16.171875, "loss": 3.110926628112793 }, { "id": "identity_83025", "image": "Geometry3K/images/223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle PQS?" }, { "from": "gpt", "value": "The answer is x degrees" } ], "clip_score": 30.78125, "loss": 2.9158878326416016 }, { "id": "identity_259035", "image": "PMC-VQA/images/PMC7191268_fig4_161300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which lymph nodes showed activity on both the 6-week and 12-week scans?" }, { "from": "gpt", "value": "The answer is Left mediastinal" } ], "clip_score": 24.625, "loss": 3.1648058891296387 }, { "id": "identity_73254", "image": "TQA/images/question_images/muscle_fiber_7085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different connective tissue sheaths are identified in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.21875, "loss": 4.6512250900268555 }, { "id": "identity_134567", "image": "ChartQA/images/two_col_82003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years have an expenditure of 14 Canadian dollars or more?" }, { "from": "gpt", "value": "The answer is 6 years" } ], "clip_score": 22.96875, "loss": 2.1830356121063232 }, { "id": "identity_111706", "image": "VQA2.0/images/35357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the airplanes?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.734375, "loss": 3.7805888652801514 }, { "id": "identity_167078", "image": "IconQA/images/70558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of small rectangles to large rectangles?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 23.625, "loss": 1.5790616273880005 }, { "id": "identity_140485", "image": "DocVQA/images/flwk0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year did RJRT introduce a cigarette that produces smoke by heating, rather than burning, tobacco?" }, { "from": "gpt", "value": "The answer is 1988" } ], "clip_score": 24.65625, "loss": 1.3253949880599976 }, { "id": "identity_316267", "image": "DocVQA/images/yznh0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the project manager mentioned for Great Western Cities?" }, { "from": "gpt", "value": "The answer is Ken Opp" } ], "clip_score": 20.796875, "loss": 3.096608877182007 }, { "id": "identity_246663", "image": "DocVQA/images/xqfl0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who approves the selection of the Operating Committee chair and vice chair, and what is the maximum number of consecutive terms they can serve?" }, { "from": "gpt", "value": "The answer is The Board; two consecutive terms" } ], "clip_score": 32.1875, "loss": 4.054023742675781 }, { "id": "identity_105952", "image": "PMC-VQA/images/PMC3407244_pone-0041569-g006_147172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification is likely used in this image?" }, { "from": "gpt", "value": "The answer is 400x magnification" } ], "clip_score": 25.640625, "loss": 1.6772468090057373 }, { "id": "identity_203954", "image": "A-OKVQA/images/28560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of surface is the person on the motorcycle on?" }, { "from": "gpt", "value": "The answer is Gravel or dirt surface" } ], "clip_score": 20.0, "loss": 2.8124585151672363 }, { "id": "identity_233828", "image": "Super-CLEVR/images/superCLEVR_new_000201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relative size of the brown thing in front of the tiny brown matte aeroplane and behind the brown motorbike compared to the yellow scooter?" }, { "from": "gpt", "value": "The answer is The brown bus is larger than the yellow scooter" } ], "clip_score": 32.71875, "loss": 1.8945525884628296 }, { "id": "identity_311182", "image": "TextVQA/images/350cfcd3adaa9891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this bar?" }, { "from": "gpt", "value": "The answer is colony house" } ], "clip_score": 16.3125, "loss": 4.704500675201416 }, { "id": "identity_30683", "image": "TextVQA/images/20a19fc5482d388d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what stove brand is that?" }, { "from": "gpt", "value": "The answer is aga" } ], "clip_score": 26.234375, "loss": 3.5714643001556396 }, { "id": "identity_136992", "image": "MapQA/images/map_12127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of California?" }, { "from": "gpt", "value": "The answer is 233423-241575" } ], "clip_score": 27.859375, "loss": 2.1264936923980713 }, { "id": "identity_169649", "image": "TabMWP/images/8492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of cars seen from Monday to Tuesday?" }, { "from": "gpt", "value": "The answer is 257" } ], "clip_score": 34.3125, "loss": 2.516927480697632 }, { "id": "identity_260338", "image": "TextVQA/images/63d30df88e2aaff3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the destination of the red bus and the number of taxis visible in the image?" }, { "from": "gpt", "value": "The answer is CANAL ST. and 2 taxis" } ], "clip_score": 19.65625, "loss": 3.1180388927459717 }, { "id": "identity_284594", "image": "PMC-VQA/images/PMC9417064_FIG6_386919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging modality was utilized to capture the displayed image? \nOptions:\n(A) X-ray\n(B) MRI\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.625, "loss": 4.30686092376709 }, { "id": "identity_282501", "image": "TabMWP/images/38248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest ages of the members in the table created by the Japanese club?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 22.6875, "loss": 3.4434471130371094 }, { "id": "identity_35424", "image": "ScienceQA/images/9939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Jeanette's experiment can best answer.\nChoices:\n(A) Do the insides of white boxes get hotter than the insides of black boxes when the boxes are left in the sun?\n(B) Do the temperatures inside boxes depend on the sizes of the boxes?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.15625, "loss": 3.6870627403259277 }, { "id": "identity_128991", "image": "TextVQA/images/8cd6412c8f6ad3fa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the aircraft?" }, { "from": "gpt", "value": "The answer is AIR NATIONAL GUARD" } ], "clip_score": 27.75, "loss": 1.6302742958068848 }, { "id": "identity_229471", "image": "DocVQA/images/yghv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of Mr. William J. Darby, including the street, city, state, and zip code?" }, { "from": "gpt", "value": "The answer is 489 Fifth Ave, NYC, 10017, Thompson's Sta, TN, 37179" } ], "clip_score": 27.375, "loss": 2.4097790718078613 }, { "id": "identity_313195", "image": "CLEVR-Math/images/CLEVR_train_003388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all gray rubber cylinders. Remove all big matte cylinders. How many items remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.6875, "loss": 2.2209157943725586 }, { "id": "identity_36127", "image": "DocVQA/images/mgph0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of water is used in water analysis?" }, { "from": "gpt", "value": "The answer is \"B\" Pan Drips" } ], "clip_score": 17.4375, "loss": 4.746026515960693 }, { "id": "identity_125990", "image": "VQA-AS/images/14429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many birds are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.578125, "loss": 2.2503607273101807 }, { "id": "identity_296676", "image": "ScienceQA/images/11082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal has a mouth adapted to consume plant matter?\nChoices:\n(A) goat\n(B) Nile crocodile" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.65625, "loss": 3.8955330848693848 }, { "id": "identity_24370", "image": "TabMWP/images/29453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some friends discussed the sizes of their coin collections. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 36.34375, "loss": 3.9915847778320312 }, { "id": "identity_101484", "image": "CLEVR-Math/images/CLEVR_train_048657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a metallic appearance?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.859375, "loss": 2.4500856399536133 }, { "id": "identity_75679", "image": "ScienceQA/images/20898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located at the southeastern corner of the United States?" }, { "from": "gpt", "value": "The answer is Florida" } ], "clip_score": 30.6875, "loss": 2.5768542289733887 }, { "id": "identity_38355", "image": "PMC-VQA/images/PMC7506999_anec12731-fig-0003_258656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the perfusion defects located?\nChoices:\n(A) Lower lobes \n(B) Right lung \n(C) Upper lobes \n(D) Left lung " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.484375, "loss": 3.6435978412628174 }, { "id": "identity_106800", "image": "Super-CLEVR/images/superCLEVR_new_000668.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects that are the same size as the green metallic aeroplane?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.734375, "loss": 3.4422035217285156 }, { "id": "identity_298763", "image": "TextVQA/images/34df2fed3d2c0aa3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which spice is displayed on the purple tag?" }, { "from": "gpt", "value": "The answer is cinnamon" } ], "clip_score": 18.1875, "loss": 2.140519618988037 }, { "id": "identity_98589", "image": "TabMWP/images/28246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of soccer balls per gym locker if there are 10 lockers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 35.75, "loss": 2.237696647644043 }, { "id": "identity_304836", "image": "ScienceQA/images/4962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What property matches this item?\nChoices:\n(A) fuzzy\n(B) slippery" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.8984375, "loss": 4.350347995758057 }, { "id": "identity_9178", "image": "TextVQA/images/81f62dc25199fb80.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what number is on the nose cone?" }, { "from": "gpt", "value": "The answer is 29" } ], "clip_score": 22.109375, "loss": 2.515083074569702 }, { "id": "identity_263954", "image": "AI2D/images/324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there is a sudden decrease in the amount of grass in the food web diagram provided, which of the following would be the most affected?\nChoices:\n(A) lion\n(B) vulture\n(C) zebra\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.65625, "loss": 3.4776768684387207 }, { "id": "identity_285377", "image": "GeoQA+/images/1609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If ∠A is an angle on the circumference of circle O, with ∠A measuring 50°, what is the degree measure of ∠OBC?\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.15625, "loss": 2.857133150100708 }, { "id": "identity_278429", "image": "DocVQA/images/lgjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is Social Hour scheduled to take place?" }, { "from": "gpt", "value": "The answer is 6:00 P.M." } ], "clip_score": 13.5625, "loss": 0.7650924921035767 }, { "id": "identity_5491", "image": "GeoQA+/images/3845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,PA切于点A,线段PO交⊙O于点C,连接BC.若∠P=40°,则∠B等于()\nChoices:\n(A) 15°\n(B) 20°\n(C) 25°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.46875, "loss": 2.666755199432373 }, { "id": "identity_283335", "image": "A-OKVQA/images/319933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are the individuals in the background engaged in?\nChoices:\n(A) serving customers\n(B) buy donuts\n(C) delivering drinks\n(D) delivering donuts" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.5625, "loss": 4.363951683044434 }, { "id": "identity_220741", "image": "VQA2.0/images/160815.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many street lamps are visible on the left side of the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 21.96875, "loss": 2.5323309898376465 }, { "id": "identity_273009", "image": "VQA2.0/images/111940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these motorcycles pre-owned?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.703125, "loss": 3.276398181915283 }, { "id": "identity_77933", "image": "AI2D/images/3850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the veins in the leaf as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Support the leaf and carry water" } ], "clip_score": 28.546875, "loss": 3.1741602420806885 }, { "id": "identity_289798", "image": "PMC-VQA/images/PMC5088359_F1_89863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method was utilized for the image in A. woodiana? \nOptions:\n(A) H&E\n(B) Gram stain\n(C) Congo red\n(D) Mallory's Trichrome stain." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.171875, "loss": 3.835498332977295 }, { "id": "identity_307692", "image": "PMC-VQA/images/PMC7251740_Fig1_178955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the origin of the mass observed in the imaging? \nChoices:\n(A) Left ventricle \n(B) Aorta \n(C) Right atrium \n(D) Left atrium" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.875, "loss": 4.317776679992676 }, { "id": "identity_257606", "image": "DocVQA/images/txcx0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years of professional experience does Ei Soon Cho have as of the current year, given that the resume was last updated in 1977?" }, { "from": "gpt", "value": "The answer is 2 years" } ], "clip_score": 22.78125, "loss": 2.90486478805542 }, { "id": "identity_186895", "image": "PMC-VQA/images/PMC6245622_Fig1_398898.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of turbulent blood flow in this thyroid ultrasound image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.46875, "loss": 2.721696376800537 }, { "id": "identity_150169", "image": "DocVQA/images/sqjf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the memo labeled as confidential?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.8125, "loss": 3.370380401611328 }, { "id": "identity_190967", "image": "VQA2.0/images/364221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the water?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 14.4375, "loss": 4.10350227355957 }, { "id": "identity_148950", "image": "MapQA/images/map_8905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest percentage of motorists?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 29.078125, "loss": 2.2650082111358643 }, { "id": "identity_91662", "image": "Geometry3K/images/1293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACD in the figure?" }, { "from": "gpt", "value": "The answer is 102 degrees" } ], "clip_score": 31.875, "loss": 2.0384132862091064 }, { "id": "identity_199123", "image": "A-OKVQA/images/359507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a second bed in the room?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.953125, "loss": 2.9362633228302 }, { "id": "identity_295289", "image": "ScienceQA/images/18815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlight among the following options: Palau, New Zealand, Australia, or Papua New Guinea?\nChoices:\n(A) Palau\n(B) New Zealand\n(C) Australia\n(D) Papua New Guinea" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.4375, "loss": 3.817901134490967 }, { "id": "identity_325058", "image": "PMC-VQA/images/PMC6198379_Fig7_385030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What method was used to capture these images?\nChoices:\n(A) Confocal microscopy \n(B) Transmission electron microscopy \n(C) Fluorescence microscopy \n(D) Scanning electron microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.90625, "loss": 3.8063511848449707 }, { "id": "identity_244273", "image": "PMC-VQA/images/PMC6004303_fig-1_329881.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main differences in resolution and magnification capabilities between the types of microscopy listed, and which type is most suitable for observing cellular structures in detail?" }, { "from": "gpt", "value": "The answer is Light microscopy is most suitable for observing cellular structures" } ], "clip_score": 25.046875, "loss": 1.8558011054992676 }, { "id": "identity_318487", "image": "DocVQA/images/sqdl0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the overall cost?" }, { "from": "gpt", "value": "The answer is $106,871" } ], "clip_score": 22.6875, "loss": 2.63332200050354 }, { "id": "identity_75435", "image": "AI2D/images/341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the second stage of development for the organism in the image?" }, { "from": "gpt", "value": "The answer is Caterpillar" } ], "clip_score": 29.59375, "loss": 1.7797274589538574 }, { "id": "identity_303267", "image": "PMC-VQA/images/PMC6565797_Fig5_493341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method was used to diagnose the stroke?\nChoices:\n(A) PET scan \n(B) CT scan \n(C) MRI \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 3.623511791229248 }, { "id": "identity_21523", "image": "IconQA/images/41131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a train. Which picture is third?\nChoices:\n(A) bear\n(B) plane\n(C) train" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.21875, "loss": 4.080086708068848 }, { "id": "identity_196971", "image": "PMC-VQA/images/PMC2146996_F1_15412.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cellular abnormality in this micrograph?" }, { "from": "gpt", "value": "The answer is No signs of cellular abnormality" } ], "clip_score": 28.765625, "loss": 2.3090660572052 }, { "id": "identity_297571", "image": "A-OKVQA/images/292498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country does this bus operate?\nChoices:\n(A) Canada\n(B) Mexico\n(C) Norway\n(D) Ireland" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.765625, "loss": 3.6582190990448 }, { "id": "identity_177697", "image": "TabMWP/images/18918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest number of words per page in Lamar's book?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 22.03125, "loss": 3.096097469329834 }, { "id": "identity_323996", "image": "DocVQA/images/fzjf0226_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number mentioned at the right bottom of the page?" }, { "from": "gpt", "value": "The answer is 00053930" } ], "clip_score": 11.1875, "loss": 2.381676435470581 }, { "id": "identity_122837", "image": "PMC-VQA/images/PMC8137781_Fig9_446471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the body does this elbow belong to?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 30.375, "loss": 3.6727654933929443 }, { "id": "identity_8184", "image": "DocVQA/images/mmbd0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page no mentioned in this document?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.09375, "loss": 3.0822620391845703 }, { "id": "identity_186845", "image": "Super-CLEVR/images/superCLEVR_new_001181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is golden in color?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 18.109375, "loss": 3.125925302505493 }, { "id": "identity_19869", "image": "GeoQA+/images/1277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB∥CD,直线l交AB于点E,交CD于点F,若∠1=60°,则∠2等于()\nChoices:\n(A) 120°\n(B) 30°\n(C) 40°\n(D) 60°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.203125, "loss": 2.553287982940674 }, { "id": "identity_190624", "image": "PMC-VQA/images/PMC4398354_pone.0123440.g003_378311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any areas of the brain that show no activity in this image?" }, { "from": "gpt", "value": "The answer is Yes, there are blue regions indicating no activity" } ], "clip_score": 29.9375, "loss": 2.753783941268921 }, { "id": "identity_272209", "image": "DocVQA/images/thcn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relative deviation percentage shown by the EPA for the absolute kidney weight endpoint?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 18.8125, "loss": 1.7798805236816406 }, { "id": "identity_181736", "image": "CLEVR-Math/images/CLEVR_train_042081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither cubes nor spheres?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.296875, "loss": 2.556929349899292 }, { "id": "identity_172988", "image": "IconQA/images/96443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of minutes from 3:00 PM to 3:45 PM?" }, { "from": "gpt", "value": "The answer is 45 minutes" } ], "clip_score": 31.359375, "loss": 1.5020360946655273 }, { "id": "identity_298899", "image": "TabMWP/images/26201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of words the author wrote on Wednesday and Friday, considering the words she wrote in the past 3 days?" }, { "from": "gpt", "value": "The answer is 7,270" } ], "clip_score": 38.71875, "loss": 2.534226894378662 }, { "id": "identity_322646", "image": "DocVQA/images/mxvm0041_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the guest receipt number in this?" }, { "from": "gpt", "value": "The answer is 98116" } ], "clip_score": 24.90625, "loss": 2.554792881011963 }, { "id": "identity_199862", "image": "PMC-VQA/images/PMC8414583_F2_50810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the granule located within the boundaries of a single cell or overlapping multiple cells?" }, { "from": "gpt", "value": "The answer is Within a single cell" } ], "clip_score": 26.453125, "loss": 2.6986804008483887 }, { "id": "identity_26100", "image": "PMC-VQA/images/PMC7894642_F4_371333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which figure shows the homogeneity of the Liqui-Pellet formulation mix? \nChoices:\n(A) Figure 5 \n(B) Figure 7 \n(C) Figure 9 \n(D) Figure 12 " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.109375, "loss": 3.2702584266662598 }, { "id": "identity_137854", "image": "MapQA/images/map_3806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest number of actions?" }, { "from": "gpt", "value": "The answer is Montana" } ], "clip_score": 33.71875, "loss": 3.0149195194244385 }, { "id": "identity_104784", "image": "Super-CLEVR/images/superCLEVR_new_000814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest bicycle in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 29.0, "loss": 4.329741954803467 }, { "id": "identity_287245", "image": "TabMWP/images/23400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Between the Minnesota Zoo and the Brookfield Zoo, which zoo charges more for a child ticket?\nChoices:\n(A) Brookfield\n(B) Minnesota" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.4375, "loss": 3.754415512084961 }, { "id": "identity_333335", "image": "PMC-VQA/images/PMC7825226_Fig1_349343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used in this? \nChoices:\n(A) Computed tomography (CT) scan \n(B) Positron emission tomography (PET) scan \n(C) Magnetic resonance imaging (MRI) \n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.328125, "loss": 4.142504692077637 }, { "id": "identity_26506", "image": "MapQA/images/map_10098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the Northeast ?" }, { "from": "gpt", "value": "The answer is 1754-2240" } ], "clip_score": 23.265625, "loss": 2.151003837585449 }, { "id": "identity_318477", "image": "DocVQA/images/nfnw0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Account No.?" }, { "from": "gpt", "value": "The answer is da0608" } ], "clip_score": 18.921875, "loss": 4.182756423950195 }, { "id": "identity_143574", "image": "ChartQA/images/5793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage points between Mexican and Non-Mexican eligible for citizenship?" }, { "from": "gpt", "value": "The answer is 33%" } ], "clip_score": 29.71875, "loss": 2.17633318901062 }, { "id": "identity_191938", "image": "PMC-VQA/images/PMC4988790_f0035_63773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of abnormalities in the brain?" }, { "from": "gpt", "value": "The answer is No signs of abnormalities are visible" } ], "clip_score": 32.65625, "loss": 2.6596128940582275 }, { "id": "identity_84626", "image": "UniGeo/images/1394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle CDB is 30 degrees, what is the area of sector COD?" }, { "from": "gpt", "value": "The answer is (1/6) * (π * r^2), where r is the radius of the circle" } ], "clip_score": 34.8125, "loss": 1.3252977132797241 }, { "id": "identity_152328", "image": "ScienceQA/images/14658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the handles of the scissors?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 35.625, "loss": 3.596168041229248 }, { "id": "identity_66178", "image": "ScienceQA/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food chain in this marine ecosystem?" }, { "from": "gpt", "value": "The answer is orca" } ], "clip_score": 33.5625, "loss": 2.9162778854370117 }, { "id": "identity_210140", "image": "A-OKVQA/images/432527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the tennis player's footwear?" }, { "from": "gpt", "value": "The answer is Nike" } ], "clip_score": 25.90625, "loss": 3.0304081439971924 }, { "id": "identity_146024", "image": "DocVQA/images/xkhv0228_30.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the National Research Council dependent on for administrative expense?" }, { "from": "gpt", "value": "The answer is endowments, grants, or actual cost contracts" } ], "clip_score": 30.984375, "loss": 3.2183845043182373 }, { "id": "identity_261722", "image": "PMC-VQA/images/PMC8005652_F4_405303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the Okra variety depicted in the image? \nChoices:\n(A) Clemson \n(B) Chantilly \n(C) Star of David \n(D) Eagle Pass" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2265625, "loss": 4.389522552490234 }, { "id": "identity_51362", "image": "ChartQA/images/3299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between U.S. adults who say breaking information that is not checked should be recognized and those who say it is too much to ask?" }, { "from": "gpt", "value": "The answer is 7%" } ], "clip_score": 31.015625, "loss": 2.189566135406494 }, { "id": "identity_256905", "image": "VQA2.0/images/377015.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many surfboards are there in the image, and what is the average height of these surfboards if the tallest one is 9 feet and the shortest one is 6 feet?" }, { "from": "gpt", "value": "The answer is 10 surfboards, average height 7.5 feet" } ], "clip_score": 29.46875, "loss": 1.7982382774353027 }, { "id": "identity_48974", "image": "FigureQA/images/46921.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the bars labeled \"Medium Purple,\" \"Blue,\" and \"Maroon\"?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 24.375, "loss": 2.3930647373199463 }, { "id": "identity_98884", "image": "IconQA/images/29906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are in the second column from the right?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.609375, "loss": 2.617011070251465 }, { "id": "identity_171193", "image": "IconQA/images/54253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you pick two marbles one after the other without replacement, what is the probability that both are blue?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 31.109375, "loss": 1.7318501472473145 }, { "id": "identity_22344", "image": "ScienceQA/images/10747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Minnesota?\nChoices:\n(A) Saint Paul\n(B) Minneapolis\n(C) Columbus\n(D) New Orleans" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.25, "loss": 4.3480658531188965 }, { "id": "identity_121162", "image": "TextVQA/images/039f33b5834a956b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any food item visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.28125, "loss": 2.996347427368164 }, { "id": "identity_296661", "image": "A-OKVQA/images/176969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: To what type of object have the toothbrushes been altered to resemble?\nChoices:\n(A) dogs\n(B) giraffes\n(C) horses\n(D) people" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.875, "loss": 3.9159774780273438 }, { "id": "identity_157964", "image": "ScienceQA/images/17161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the large island located southeast of the highlighted country?" }, { "from": "gpt", "value": "The answer is Australia" } ], "clip_score": 32.78125, "loss": 3.272930860519409 }, { "id": "identity_41573", "image": "DVQA/images/bar_train_00104295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object category has the highest number of people preferring 'jail'?" }, { "from": "gpt", "value": "The answer is input" } ], "clip_score": 31.484375, "loss": 3.874764919281006 }, { "id": "identity_37630", "image": "TextVQA/images/0d9b5bca13a9e195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does it say on this van's hood?" }, { "from": "gpt", "value": "The answer is fire" } ], "clip_score": 23.1875, "loss": 2.8246192932128906 }, { "id": "identity_278434", "image": "GeoQA+/images/1435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point P outside circle O is connected to the two tangents PA and PB to circle O, with tangents at points A and B respectively. Point O intersects circle O at point C, and point D is a moving point on arc AEB that does not coincide with points A or C. Lines AD and CD are drawn. If ∠APB = 60°, what is the degree measure of ∠ADC?\nChoices:\n(A) 15\n(B) 20°\n(C) 25°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.4375, "loss": 2.3193702697753906 }, { "id": "identity_157041", "image": "ScienceQA/images/9901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 26.375, "loss": 2.3391776084899902 }, { "id": "identity_95054", "image": "IconQA/images/46696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If we remove 2 triangles from the first row, how many triangles will be left in total?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 30.703125, "loss": 2.3902409076690674 }, { "id": "identity_40002", "image": "FigureQA/images/93677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what approximate x-axis value do Cyan and Dark Green intersect?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 22.359375, "loss": 2.2326457500457764 }, { "id": "identity_217796", "image": "PMC-VQA/images/PMC3937238_F2_270200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the object being examined?" }, { "from": "gpt", "value": "The answer is Spherical" } ], "clip_score": 21.4375, "loss": 2.985229969024658 }, { "id": "identity_154591", "image": "TQA/images/question_images/simple_machines_9243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man's position relative to the load?" }, { "from": "gpt", "value": "The answer is He is standing on a platform pulling the rope diagonally upwards" } ], "clip_score": 36.90625, "loss": 2.746849298477173 }, { "id": "identity_238656", "image": "FigureQA/images/22444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the pie chart is represented by Crimson, and how does it compare to the percentage represented by Dark Turquoise?" }, { "from": "gpt", "value": "The answer is Crimson: 60%, Dark Turquoise: 40%" } ], "clip_score": 43.0, "loss": 1.0769199132919312 }, { "id": "identity_302795", "image": "Super-CLEVR/images/superCLEVR_new_001565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else that has the same shape as this blue rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 14.9140625, "loss": 4.91159200668335 }, { "id": "identity_221964", "image": "TextVQA/images/3877144880f54acd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the Lewyt vacuum cleaner as mentioned in point number 7?" }, { "from": "gpt", "value": "The answer is Just 21\" high" } ], "clip_score": 31.5, "loss": 3.80401873588562 }, { "id": "identity_79914", "image": "GeoQA+/images/5329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 5 and the central angles AOB and COD are supplementary, what is the relationship between the lengths of chords AB and CD?" }, { "from": "gpt", "value": "The answer is Length of AB / Length of CD = sin(AOB) / sin(COD)" } ], "clip_score": 35.84375, "loss": 1.0522451400756836 }, { "id": "identity_176540", "image": "IconQA/images/1642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more green circles or orange circles?" }, { "from": "gpt", "value": "The answer is The same number" } ], "clip_score": 30.53125, "loss": 3.8075759410858154 }, { "id": "identity_304243", "image": "DVQA/images/bar_train_00151139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are liked by fewer than 2 individuals in at least one category?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 28.21875, "loss": 3.669337511062622 }, { "id": "identity_327563", "image": "DocVQA/images/ymvx0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for \"Outdoor Signs\"?" }, { "from": "gpt", "value": "The answer is Cunningham" } ], "clip_score": 15.96875, "loss": 2.8926942348480225 }, { "id": "identity_9075", "image": "DocVQA/images/yzlh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is date mentioned top of the page" }, { "from": "gpt", "value": "The answer is 3-3-64" } ], "clip_score": 16.953125, "loss": 2.860870122909546 }, { "id": "identity_235241", "image": "ChartQA/images/multi_col_20008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average of the maximum Q1 average daily rate and the minimum Q4 average daily rate in hotels in Toronto from 2012 to 2017?" }, { "from": "gpt", "value": "The answer is 176" } ], "clip_score": 24.171875, "loss": 2.2802586555480957 }, { "id": "identity_292341", "image": "PMC-VQA/images/PMC7364197_fig1_214940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what is the significance of the white arrowhead pointing to?\nChoices:\n(A) Reactive bone \n(B) Marrow infection \n(C) Fracture position \n(D) Nerve damage" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.515625, "loss": 4.502471446990967 }, { "id": "identity_194510", "image": "A-OKVQA/images/563371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of water body is the elephant standing in?" }, { "from": "gpt", "value": "The answer is River or stream" } ], "clip_score": 28.0, "loss": 3.224149703979492 }, { "id": "identity_22745", "image": "ScienceQA/images/7122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which state is highlighted?\nChoices:\n(A) Missouri\n(B) Ohio\n(C) Iowa\n(D) Illinois" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.765625, "loss": 3.3876967430114746 }, { "id": "identity_61118", "image": "FigureQA/images/72735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the initial value of Gold at x-axis label 0?" }, { "from": "gpt", "value": "The answer is Approximately 30" } ], "clip_score": 23.328125, "loss": 2.883758783340454 }, { "id": "identity_305613", "image": "DocVQA/images/lycj0037_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where can the labels used to identify these tobaccos be obtained?" }, { "from": "gpt", "value": "The answer is Wayne Sanders, Macon Q.A." } ], "clip_score": 18.015625, "loss": 4.164478778839111 }, { "id": "identity_173125", "image": "TabMWP/images/5732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one bow tie is selected at random, what is the probability that it is either a suede batwing or a cotton butterfly?" }, { "from": "gpt", "value": "The answer is 4/15 + 3/15 = 7/15" } ], "clip_score": 35.875, "loss": 1.2270491123199463 }, { "id": "identity_275148", "image": "FigureQA/images/77843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Dark Red and Medium Purple intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.265625, "loss": 3.286306858062744 }, { "id": "identity_253563", "image": "MapQA/images/map_7366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border Indiana, which state has the highest percentage of centers, and what is that percentage?" }, { "from": "gpt", "value": "The answer is Ohio, 3.9%-4.0%" } ], "clip_score": 35.03125, "loss": 2.3277368545532227 }, { "id": "identity_125585", "image": "TextVQA/images/74f0d5689289022f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles have a purple cap on the top shelf?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 27.1875, "loss": 4.393394470214844 }, { "id": "identity_199214", "image": "PMC-VQA/images/PMC7850656_F1_357394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the imaging technique used to produce image B?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.875, "loss": 2.4190056324005127 }, { "id": "identity_21745", "image": "A-OKVQA/images/172690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do they want to prevent the ball from touching?\nChoices:\n(A) grass\n(B) sand\n(C) people\n(D) net" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.203125, "loss": 4.3006591796875 }, { "id": "identity_143973", "image": "ChartQA/images/45065189000729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Gender Inequality Index value for the United Kingdom less than 0.25?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.15625, "loss": 3.470604658126831 }, { "id": "identity_102499", "image": "TabMWP/images/21504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you want to attend both the Spanish club meeting and the football game, how much time do you have in between the two events?" }, { "from": "gpt", "value": "The answer is No time in between" } ], "clip_score": 28.3125, "loss": 3.214810848236084 }, { "id": "identity_77002", "image": "AI2D/images/2509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which date does the Winter Solstice occur?" }, { "from": "gpt", "value": "The answer is December 21-22" } ], "clip_score": 28.296875, "loss": 1.6241832971572876 }, { "id": "identity_132712", "image": "Super-CLEVR/images/superCLEVR_new_001027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Blue and yellow" } ], "clip_score": 20.484375, "loss": 3.9950077533721924 }, { "id": "identity_289423", "image": "DocVQA/images/qrwl0079_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total price for the Camel Filter 85 prototype?" }, { "from": "gpt", "value": "The answer is $6,956" } ], "clip_score": 27.0625, "loss": 2.546095848083496 }, { "id": "identity_79647", "image": "Geometry3K/images/9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle at the center of the circle subtended by the arc between the points of tangency?" }, { "from": "gpt", "value": "The answer is 280°" } ], "clip_score": 35.8125, "loss": 2.5357911586761475 }, { "id": "identity_226416", "image": "DVQA/images/bar_train_00095258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are preferred by more than 5 people in both categories?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 34.0625, "loss": 2.2691004276275635 }, { "id": "identity_233049", "image": "AI2D/images/3334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organelle labeled with the number 9 and describe its primary function in the cell" }, { "from": "gpt", "value": "The answer is The organelle is the mitochondrion, and its primary function is to produce energy (ATP) through cellular respiration" } ], "clip_score": 27.03125, "loss": 1.4050407409667969 }, { "id": "identity_102337", "image": "IconQA/images/94292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more dogs or houses in the image?" }, { "from": "gpt", "value": "The answer is Dogs" } ], "clip_score": 24.96875, "loss": 2.3091237545013428 }, { "id": "identity_98321", "image": "IconQA/images/43100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the height of the triangle?" }, { "from": "gpt", "value": "The answer is 3.464 cm" } ], "clip_score": 33.8125, "loss": 2.3877127170562744 }, { "id": "identity_72198", "image": "AI2D/images/969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which inflorescence type is characterized by having a main axis that branches repeatedly?" }, { "from": "gpt", "value": "The answer is Panicle" } ], "clip_score": 32.125, "loss": 2.868284225463867 }, { "id": "identity_276473", "image": "TabMWP/images/17761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of the 5 pounds of bittersweet chocolate that Britney purchased?" }, { "from": "gpt", "value": "The answer is 13.35" } ], "clip_score": 33.34375, "loss": 2.1658642292022705 }, { "id": "identity_23807", "image": "PMC-VQA/images/PMC6711709_fig5_29123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the dashed arrows indicate in the image?\nChoices:\n(A) protein localization \n(B) nucleolar mass \n(C) mitochondria distribution \n(D) lipid droplets localization " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 4.8835248947143555 }, { "id": "identity_17270", "image": "DVQA/images/bar_train_00048279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the left in each group?" }, { "from": "gpt", "value": "The answer is crew" } ], "clip_score": 21.46875, "loss": 3.240442991256714 }, { "id": "identity_286246", "image": "IconQA/images/53987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) impossible\n(C) unlikely\n(D) certain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.890625, "loss": 3.9679558277130127 }, { "id": "identity_118079", "image": "A-OKVQA/images/308947.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city's fire department is responding to the emergency?" }, { "from": "gpt", "value": "The answer is N.Y. (New York)" } ], "clip_score": 27.0, "loss": 1.942503571510315 }, { "id": "identity_242427", "image": "FigureQA/images/87289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between which x-axis values does Light Green intersect with both Periwinkle and Light Gold?" }, { "from": "gpt", "value": "The answer is Between x = 50 and x = 70" } ], "clip_score": 23.4375, "loss": 1.6330912113189697 }, { "id": "identity_26261", "image": "A-OKVQA/images/468548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is happening to the bananas in water?\nChoices:\n(A) storage\n(B) injected\n(C) fertilizing\n(D) washing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.015625, "loss": 3.948702335357666 }, { "id": "identity_162983", "image": "GeoQA+/images/2026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is tangent to circle O at point B and angle A is 42 degrees, what is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 24 degrees" } ], "clip_score": 37.15625, "loss": 2.20780086517334 }, { "id": "identity_108512", "image": "TextVQA/images/8aa8c4cf9db9547b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are lining the street?" }, { "from": "gpt", "value": "The answer is Likely to be a species of evergreen trees" } ], "clip_score": 23.609375, "loss": 3.253065347671509 }, { "id": "identity_129858", "image": "Super-CLEVR/images/superCLEVR_new_001020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.890625, "loss": 3.4217193126678467 }, { "id": "identity_70864", "image": "AI2D/images/322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the kelp forest food web, which organism is at the top of the food chain?" }, { "from": "gpt", "value": "The answer is Sharks" } ], "clip_score": 29.828125, "loss": 2.4743688106536865 }, { "id": "identity_171702", "image": "TabMWP/images/18787.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the studio can only accommodate 150 dancers in total, how many more dancers can join any class before reaching capacity?" }, { "from": "gpt", "value": "The answer is 150 - (68+24+35+47) = 150 - 174 = -24, which means the studio is already over capacity by 24 dancers" } ], "clip_score": 30.671875, "loss": 0.975711464881897 }, { "id": "identity_252463", "image": "PMC-VQA/images/PMC4429208_fig1_385711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What medical procedure is likely being performed in the image, and what is the purpose of this procedure?" }, { "from": "gpt", "value": "The answer is Diagnosing or treating heart conditions" } ], "clip_score": 25.390625, "loss": 3.447705030441284 }, { "id": "identity_33316", "image": "ChartQA/images/2452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Boomer who favors legalization of marijuana?" }, { "from": "gpt", "value": "The answer is 0.63" } ], "clip_score": 35.75, "loss": 2.45967698097229 }, { "id": "identity_104756", "image": "VQA-AS/images/9854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a blanket on the couch?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.953125, "loss": 3.310195207595825 }, { "id": "identity_287347", "image": "PMC-VQA/images/PMC4035491_Fig4_292125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of image displays fleck-like calcifications? \nOptions:\n(A) Coronal CT image\n(B) Sagittal T2-weighted MRI image\n(C) Axial T2-weighted MRI image\n(D) Axial T1-weighted MRI image" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 4.006122589111328 }, { "id": "identity_85623", "image": "GeoQA+/images/265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of segment MC to segment CN?" }, { "from": "gpt", "value": "The answer is 2/3" } ], "clip_score": 21.71875, "loss": 1.557634949684143 }, { "id": "identity_276942", "image": "Super-CLEVR/images/superCLEVR_new_000518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer cyan objects in front of the metal aeroplane compared to the number of rubber dirtbikes on the left side of the big green rubber aeroplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 22.140625, "loss": 5.075296878814697 }, { "id": "identity_90460", "image": "UniGeo/images/3142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rhombus ABCD, if ∠A = 60° and point C is folded onto line DP at point C', what is the ratio of the area of triangle ADC' to the area of the original rhombus ABCD?" }, { "from": "gpt", "value": "The answer is 1:2" } ], "clip_score": 36.21875, "loss": 1.2328323125839233 }, { "id": "identity_128417", "image": "PMC-VQA/images/PMC6922538_F2_87534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lesion located in the left or right lobe of the organ?" }, { "from": "gpt", "value": "The answer is Right lobe" } ], "clip_score": 27.59375, "loss": 3.413041591644287 }, { "id": "identity_160046", "image": "UniGeo/images/3176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if DE is parallel to BC and AE:EC is 5:2, what is the ratio of AD:DB?" }, { "from": "gpt", "value": "The answer is 5/2" } ], "clip_score": 34.25, "loss": 1.7897433042526245 }, { "id": "identity_2652", "image": "IconQA/images/41225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many triangles are there?\nChoices:\n(A) 2\n(B) 1\n(C) 9\n(D) 6\n(E) 4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.390625, "loss": 2.9807887077331543 }, { "id": "identity_241459", "image": "DVQA/images/bar_train_00143006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the value of actor and twice the value of plan?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 34.5, "loss": 2.375542402267456 }, { "id": "identity_26050", "image": "IconQA/images/45824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Chase is shopping this morning. The clock shows the time. What time is it?\nChoices:\n(A) 11:00 P.M.\n(B) 11:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 3.7186684608459473 }, { "id": "identity_183511", "image": "A-OKVQA/images/26164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the snowboarder wearing gloves?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.109375, "loss": 3.180870771408081 }, { "id": "identity_211202", "image": "A-OKVQA/images/425497.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the snowboard in the foreground?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 27.484375, "loss": 3.5590388774871826 }, { "id": "identity_167689", "image": "TabMWP/images/38191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did Emma see more than 6 birds?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.3125, "loss": 2.3889830112457275 }, { "id": "identity_151426", "image": "TQA/images/question_images/atomic_mass_number_9013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average atomic mass of the element shown in the image?" }, { "from": "gpt", "value": "The answer is 95.94" } ], "clip_score": 31.40625, "loss": 1.2107807397842407 }, { "id": "identity_259123", "image": "PMC-VQA/images/PMC7934521_Fig2_384858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the path of the contrast agent from the injection site in the feeding artery to the alveoli, including all major structures it passes through" }, { "from": "gpt", "value": "The answer is Feeding artery, arterioles, capillaries, alveoli" } ], "clip_score": 30.703125, "loss": 1.726033329963684 }, { "id": "identity_116610", "image": "PMC-VQA/images/PMC6392508_F3_441739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this brain scan show any signs of hydrocephalus?" }, { "from": "gpt", "value": "The answer is No signs of hydrocephalus" } ], "clip_score": 32.03125, "loss": 2.4829771518707275 }, { "id": "identity_189934", "image": "VQA2.0/images/430522.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place in the foreground?" }, { "from": "gpt", "value": "The answer is Kite flying" } ], "clip_score": 26.296875, "loss": 2.5550849437713623 }, { "id": "identity_71112", "image": "TQA/images/question_images/parts_skeleton_7275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bone is connected to the os coxae?" }, { "from": "gpt", "value": "The answer is Femur" } ], "clip_score": 29.53125, "loss": 2.9992713928222656 }, { "id": "identity_130512", "image": "A-OKVQA/images/474069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the statue in the left of the image depicting?" }, { "from": "gpt", "value": "The answer is A historical or religious figure" } ], "clip_score": 18.78125, "loss": 3.531041145324707 }, { "id": "identity_22426", "image": "DVQA/images/bar_train_00157108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm with lowest accuracy?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.15625, "loss": 2.8700125217437744 }, { "id": "identity_9393", "image": "Geometry3K/images/344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the kite.\nChoices:\n(A) 42\n(B) 52.5\n(C) 54\n(D) 63" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.5625, "loss": 2.9361929893493652 }, { "id": "identity_89187", "image": "GeoQA+/images/5918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the square ABCD, if ∠BCE is 70°, what is the measure of ∠AEB?" }, { "from": "gpt", "value": "The answer is 160°" } ], "clip_score": 36.8125, "loss": 1.7517237663269043 }, { "id": "identity_79461", "image": "GeoQA+/images/2063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the sector area of the cone's lateral surface is 65π cm² and the arc length is 13π cm, what is the circumference of the base of the cone?" }, { "from": "gpt", "value": "The answer is 13π cm" } ], "clip_score": 32.25, "loss": 1.5817620754241943 }, { "id": "identity_152250", "image": "ScienceQA/images/13946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sole of the boots?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.09375, "loss": 3.6200428009033203 }, { "id": "identity_256169", "image": "DocVQA/images/jggn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Disc ID and the date mentioned in this document?" }, { "from": "gpt", "value": "The answer is Disc ID: 29087901, Date: 20010326" } ], "clip_score": 27.09375, "loss": 2.013145685195923 }, { "id": "identity_263349", "image": "UniGeo/images/2960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, point P is a point on the diagonal AC of rectangle ABCD. Line EF is drawn parallel to BC passing through point P, intersecting AB and CD at E and F respectively. PB and PD are connected. If AE = 2 and PF = 8, what is the area of the shaded region in the diagram? \nChoices:\n(A) 10\n(B) 12\n(C) 16\n(D) 18" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.4375, "loss": 2.1980972290039062 }, { "id": "identity_66100", "image": "AI2D/images/1980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of dragonfly nymphs decreased, what would be the likely effect on the tadpole population?" }, { "from": "gpt", "value": "The answer is Increase in tadpole population" } ], "clip_score": 32.46875, "loss": 2.4145052433013916 }, { "id": "identity_210918", "image": "PMC-VQA/images/PMC9200539_fig6_311851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.75, "loss": 2.9165802001953125 }, { "id": "identity_308433", "image": "ScienceQA/images/15694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy was transferred from () to ().\nChoices:\n(A) each battery . . . the surroundings\n(B) the surroundings . . . each battery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.78125, "loss": 3.957602024078369 }, { "id": "identity_175764", "image": "IconQA/images/64365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more stars in the first row or in the second row?" }, { "from": "gpt", "value": "The answer is The same number" } ], "clip_score": 32.59375, "loss": 3.8088390827178955 }, { "id": "identity_71619", "image": "AI2D/images/109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which stage follows immediately after the 4th instar in the locust life cycle?" }, { "from": "gpt", "value": "The answer is 5th instar" } ], "clip_score": 30.625, "loss": 1.3043138980865479 }, { "id": "identity_1398", "image": "DocVQA/images/rrcw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which forum is mentioned in the header of the document?" }, { "from": "gpt", "value": "The answer is ERT/HRT Expert Forums" } ], "clip_score": 23.296875, "loss": 3.9712412357330322 }, { "id": "identity_106939", "image": "VQA2.0/images/448609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible logos or brand names in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 11.421875, "loss": 2.813577651977539 }, { "id": "identity_140890", "image": "DocVQA/images/ghgw0023_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Item ID mentioned in the document?" }, { "from": "gpt", "value": "The answer is B-9" } ], "clip_score": 21.90625, "loss": 3.601701259613037 }, { "id": "identity_111822", "image": "TextVQA/images/a9168a42e98d69df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What symbol is located at the top of the coin?" }, { "from": "gpt", "value": "The answer is A crown" } ], "clip_score": 19.796875, "loss": 4.669498443603516 }, { "id": "identity_42784", "image": "DocVQA/images/rrvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the general sentiment about the meeting in Prague according to the note?" }, { "from": "gpt", "value": "The answer is Less and less attended and seems less important" } ], "clip_score": 24.78125, "loss": 4.640893459320068 }, { "id": "identity_202437", "image": "PMC-VQA/images/PMC6321279_ijms-19-03905-f003_419394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the middle of the MRI scan?" }, { "from": "gpt", "value": "The answer is Third ventricle" } ], "clip_score": 32.71875, "loss": 3.7180161476135254 }, { "id": "identity_87612", "image": "UniGeo/images/1590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangles ADB and AEC are similar, and the lengths AB, DB, and EC are 3, 2, and 6 respectively, find the ratio of the areas of triangle ADB to triangle AEC" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 34.375, "loss": 1.8227362632751465 }, { "id": "identity_276467", "image": "DocVQA/images/qfjg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Two weeks ago, where did he pass?" }, { "from": "gpt", "value": "The answer is Camp Miniwauca" } ], "clip_score": 16.765625, "loss": 4.0067548751831055 }, { "id": "identity_214782", "image": "VQA2.0/images/440689.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person in the foreground wearing a helmet?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.546875, "loss": 3.5292320251464844 }, { "id": "identity_53532", "image": "DocVQA/images/rrgj0223_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of shares held by non-promoter non-institutions?" }, { "from": "gpt", "value": "The answer is 12978870" } ], "clip_score": 23.984375, "loss": 2.391690969467163 }, { "id": "identity_64936", "image": "AI2D/images/327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fossil is depicted in the second layer from the top?" }, { "from": "gpt", "value": "The answer is Fish fossil" } ], "clip_score": 32.90625, "loss": 3.121774911880493 }, { "id": "identity_113317", "image": "PMC-VQA/images/PMC5459700_F1_175519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measurement technique used to determine the distance between \"T0 APL\" and \"T1 APL\"?" }, { "from": "gpt", "value": "The answer is CT imaging with calibrated measurement tools" } ], "clip_score": 31.4375, "loss": 3.4918172359466553 }, { "id": "identity_174332", "image": "IconQA/images/101651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you cut 0.5 inches off the tip of the nail, how long will the remaining part be?" }, { "from": "gpt", "value": "The answer is 2 inches" } ], "clip_score": 32.53125, "loss": 2.406644105911255 }, { "id": "identity_96952", "image": "TabMWP/images/7526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern in the table continues, what y-value corresponds to an x-value of 15?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 24.203125, "loss": 2.003889799118042 }, { "id": "identity_171782", "image": "IconQA/images/14739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you arrange the orange ovals in groups of 4, how many full groups will you have?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 37.53125, "loss": 2.2766568660736084 }, { "id": "identity_74771", "image": "AI2D/images/3259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part is the site of protein synthesis?" }, { "from": "gpt", "value": "The answer is Ribosomes" } ], "clip_score": 27.265625, "loss": 2.01767635345459 }, { "id": "identity_155153", "image": "ScienceQA/images/3312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature that forms the northern border of the highlighted country?" }, { "from": "gpt", "value": "The answer is A landform (state border)" } ], "clip_score": 26.359375, "loss": 3.734642744064331 }, { "id": "identity_334330", "image": "VizWiz/images/VizWiz_train_00007638.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this one salt or is that one salt?" }, { "from": "gpt", "value": "The answer is left salt" } ], "clip_score": 28.703125, "loss": 3.5385992527008057 }, { "id": "identity_153107", "image": "ScienceQA/images/5521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the animal shown in the image?" }, { "from": "gpt", "value": "The answer is Muskox" } ], "clip_score": 34.46875, "loss": 3.1341421604156494 }, { "id": "identity_184569", "image": "A-OKVQA/images/248229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the natural debris scattered on the beach?" }, { "from": "gpt", "value": "The answer is Seaweed" } ], "clip_score": 24.515625, "loss": 2.4662747383117676 }, { "id": "identity_92797", "image": "TabMWP/images/9008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the percentage change in the number of employees with health care benefits from 2010 to 2014?" }, { "from": "gpt", "value": "The answer is -27.03%" } ], "clip_score": 37.71875, "loss": 2.0787265300750732 }, { "id": "identity_226528", "image": "ScienceQA/images/3080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average air temperature of the regions within the outlined area and the surrounding area within a 5-degree latitude radius?" }, { "from": "gpt", "value": "The answer is 26°C" } ], "clip_score": 27.296875, "loss": 2.3100194931030273 }, { "id": "identity_71061", "image": "ScienceQA/images/17283.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock is shown in the image?" }, { "from": "gpt", "value": "The answer is Sandstone" } ], "clip_score": 30.140625, "loss": 3.328378915786743 }, { "id": "identity_132825", "image": "VQA2.0/images/539180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the seats in the stadium?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.4375, "loss": 3.750176191329956 }, { "id": "identity_140080", "image": "DocVQA/images/nryf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the flight number for the second leg of the journey?" }, { "from": "gpt", "value": "The answer is 125" } ], "clip_score": 18.875, "loss": 2.517850399017334 }, { "id": "identity_204853", "image": "A-OKVQA/images/390658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clocks are there in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.734375, "loss": 2.267209768295288 }, { "id": "identity_238094", "image": "PMC-VQA/images/PMC7189031_os12665-fig-0004_160452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key radiographic features that differentiate a transverse osteotomy combined with total hip arthroplasty (THA) from other surgical procedures such as hip dislocation surgery, arthroscopy, and craniotomy, and how do these features manifest in the given radiographic image?" }, { "from": "gpt", "value": "The answer is transverse osteotomy combined with total hip arthroplasty" } ], "clip_score": 29.9375, "loss": 1.6010252237319946 }, { "id": "identity_60323", "image": "DVQA/images/bar_train_00194929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy value for the 'worker' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is 6.33" } ], "clip_score": 30.03125, "loss": 2.1235249042510986 }, { "id": "identity_131265", "image": "Super-CLEVR/images/superCLEVR_new_001892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest airplane in the image?" }, { "from": "gpt", "value": "The answer is The smallest airplane is purple" } ], "clip_score": 23.4375, "loss": 2.5991523265838623 }, { "id": "identity_43250", "image": "DVQA/images/bar_train_00015221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the highest total number of units sold for all items combined?" }, { "from": "gpt", "value": "The answer is mud" } ], "clip_score": 31.71875, "loss": 3.627941608428955 }, { "id": "identity_57488", "image": "MapQA/images/map_11939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of club numbers in the Midwest region?" }, { "from": "gpt", "value": "The answer is 4748882-5221389" } ], "clip_score": 27.515625, "loss": 2.4369585514068604 }, { "id": "identity_273191", "image": "FigureQA/images/64994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Maroon and Cornflower intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.171875, "loss": 3.9156463146209717 }, { "id": "identity_337925", "image": "DVQA/images/bar_train_00179988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars?" }, { "from": "gpt", "value": "The answer is skin" } ], "clip_score": 21.296875, "loss": 4.430583953857422 }, { "id": "identity_80596", "image": "Geometry3K/images/31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the angle marked as 2x°?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 31.3125, "loss": 1.7703206539154053 }, { "id": "identity_53377", "image": "FigureQA/images/86252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point on the x-axis does Crimson have its lowest value?" }, { "from": "gpt", "value": "The answer is Approximately at 60 on the x-axis" } ], "clip_score": 25.71875, "loss": 1.8800588846206665 }, { "id": "identity_140430", "image": "FigureQA/images/97096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Dark Gray on the y-axis label?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 29.5625, "loss": 2.5225682258605957 }, { "id": "identity_272679", "image": "PMC-VQA/images/PMC7403371_Fig1_226922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the abnormality depicted in the image. \nOptions:\n(A) Ventricular septal defect,\n(B) Patent ductus arteriosus,\n(C) Atrial septal defect,\n(D) Tetralogy of Fallot." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.09375, "loss": 4.303154945373535 }, { "id": "identity_321791", "image": "ScienceQA/images/13369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Phoenix\n(B) Helena\n(C) Billings\n(D) Providence" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.103692054748535 }, { "id": "identity_183237", "image": "PMC-VQA/images/PMC3551772_pone-0054969-g001_180501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see any bones in this image?" }, { "from": "gpt", "value": "The answer is Yes, hip bones and spine" } ], "clip_score": 25.25, "loss": 3.0137808322906494 }, { "id": "identity_315625", "image": "AI2D/images/3039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram above, what system is depicted?\nChoices:\n(A) Circulatory\n(B) Lymphatic\n(C) Cellular\n(D) Endocrine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.53125, "loss": 3.7977993488311768 }, { "id": "identity_323965", "image": "UniGeo/images/2386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知:在边长为12的正方形中,有一个小正方形,其中E、F、G分别在AB、BC、FD上.若BF=3,则BE长为()\nChoices:\n(A) 1\n(B) 2.5\n(C) 2.25\n(D) 1.5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 2.5067977905273438 }, { "id": "identity_95158", "image": "IconQA/images/37991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the current time is shown in the image, how many quarter-hours have passed since noon?" }, { "from": "gpt", "value": "The answer is 8 quarter-hours" } ], "clip_score": 27.515625, "loss": 1.7735201120376587 }, { "id": "identity_111096", "image": "TextVQA/images/e874aec4a036fb1f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What league is this football match a part of?" }, { "from": "gpt", "value": "The answer is Barclays Premier League" } ], "clip_score": 27.171875, "loss": 1.8400042057037354 }, { "id": "identity_192920", "image": "PMC-VQA/images/PMC6572828_Fig2_495060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique could have been used to produce this image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) staining" } ], "clip_score": 29.828125, "loss": 0.9208889007568359 }, { "id": "identity_69343", "image": "ScienceQA/images/19631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a secondary consumer but not a predator in this food web?" }, { "from": "gpt", "value": "The answer is Sea cucumber" } ], "clip_score": 30.28125, "loss": 1.9990553855895996 }, { "id": "identity_8098", "image": "UniGeo/images/2225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,有一斜坡AB,坡角∠B=30°,其水平长度BC为30米,则坡面AB的长为()\nChoices:\n(A) 15米\n(B) 15√{3}米\n(C) 20√{3}米\n(D) 60米" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.015625, "loss": 2.857032299041748 }, { "id": "identity_91071", "image": "GeoQA+/images/4700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of triangle AEF to the area of quadrilateral BCEF?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 34.03125, "loss": 1.426031231880188 }, { "id": "identity_21009", "image": "AI2D/images/995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which has glands on the hair?\nChoices:\n(A) Glandular hair\n(B) Simple unicellular hair\n(C) Foot hair\n(D) Head hair" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.828125, "loss": 3.6749420166015625 }, { "id": "identity_131775", "image": "PMC-VQA/images/PMC3550705_Fig1_180401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is primarily being assessed in this image?" }, { "from": "gpt", "value": "The answer is Retina" } ], "clip_score": 23.703125, "loss": 3.257457733154297 }, { "id": "identity_83761", "image": "UniGeo/images/2104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given quadrilateral ABCD, if AB is parallel to CD, AD is parallel to BC, and the angle bisectors of ∠BAD and ∠ADC intersect BC at points E and F respectively, what is the length of BC if AB=5 and EF=2?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.1875, "loss": 2.849513292312622 }, { "id": "identity_122315", "image": "A-OKVQA/images/479595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.546875, "loss": 3.2802956104278564 }, { "id": "identity_182114", "image": "A-OKVQA/images/144018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas can be seen in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.53125, "loss": 2.281744956970215 }, { "id": "identity_134148", "image": "DVQA/images/bar_train_00058825.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the 'impact' category across all stacks?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.0, "loss": 2.720731019973755 }, { "id": "identity_110162", "image": "TextVQA/images/65e0dd541ab2a106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many records are displayed in the bottom row?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 17.65625, "loss": 2.489839792251587 }, { "id": "identity_165909", "image": "GeoQA+/images/5900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given rectangle ABCD, if AB = 6 and BC = 8, what is the measure of angle AOB?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 32.15625, "loss": 2.5223286151885986 }, { "id": "identity_209336", "image": "PMC-VQA/images/PMC4567369_f01_422971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any sinus cavities in the image?" }, { "from": "gpt", "value": "The answer is Yes, maxillary sinuses are visible" } ], "clip_score": 28.65625, "loss": 2.7678263187408447 }, { "id": "identity_11155", "image": "TextVQA/images/cf6a3aef41cfef1d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is being presented?" }, { "from": "gpt", "value": "The answer is charleroi 2035" } ], "clip_score": 26.453125, "loss": 1.598575234413147 }, { "id": "identity_210233", "image": "VQA2.0/images/387514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the equipment attached to the front of the motorcycle?" }, { "from": "gpt", "value": "The answer is Siren or speaker" } ], "clip_score": 21.265625, "loss": 3.8403878211975098 }, { "id": "identity_96917", "image": "IconQA/images/97655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the worm?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 27.359375, "loss": 2.3414506912231445 }, { "id": "identity_279400", "image": "A-OKVQA/images/407820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be found under the red and white sign?\nChoices:\n(A) boy\n(B) polar bear\n(C) seal\n(D) cat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.3125, "loss": 3.8857388496398926 }, { "id": "identity_40333", "image": "FigureQA/images/41571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the category 'Peru' represented by the top or bottom half of the pie chart?" }, { "from": "gpt", "value": "The answer is Bottom" } ], "clip_score": 27.03125, "loss": 2.501206636428833 }, { "id": "identity_288077", "image": "DocVQA/images/rxvh0227_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the projected deficit in dollars for the University of Alabama?" }, { "from": "gpt", "value": "The answer is 61,000" } ], "clip_score": 26.0, "loss": 2.099485158920288 }, { "id": "identity_192213", "image": "VQA2.0/images/145444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the object hanging on the wall behind the monitor?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 24.28125, "loss": 3.5619990825653076 }, { "id": "identity_254410", "image": "VizWiz/images/VizWiz_train_00012755.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weight of the flour in grams?" }, { "from": "gpt", "value": "The answer is 1500 grams" } ], "clip_score": 25.5625, "loss": 1.539521336555481 }, { "id": "identity_277828", "image": "TextVQA/images/1abcf3116b521888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the rest of the text on the diploma?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.1484375, "loss": 4.060794830322266 }, { "id": "identity_323693", "image": "ChartQA/images/two_col_20814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many comorbidities were observed in COVID-19 deceased patients in Italy as of April 28, 2021, that were over 3?" }, { "from": "gpt", "value": "The answer is 67" } ], "clip_score": 26.03125, "loss": 2.520212411880493 }, { "id": "identity_29571", "image": "TQA/images/question_images/earth_poles_8057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is the closest to the Equator?\nChoices:\n(A) Europe\n(B) South America\n(C) Oceania\n(D) North America" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.21875, "loss": 3.7981979846954346 }, { "id": "identity_156983", "image": "TQA/images/question_images/photosynthesis_1316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the diagram indicate the direction of water movement in the process?" }, { "from": "gpt", "value": "The answer is Arrows pointing upwards from roots to plant" } ], "clip_score": 27.296875, "loss": 2.90838360786438 }, { "id": "identity_253562", "image": "PMC-VQA/images/PMC6327415_Fig13_420938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the cell type visible in panel (c) and describe two key structural features that help in identifying this cell type" }, { "from": "gpt", "value": "The answer is Astrocytes; star-shaped with numerous processes, large round nucleus" } ], "clip_score": 30.1875, "loss": 3.5224387645721436 }, { "id": "identity_14566", "image": "VQA2.0/images/227828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where are they playing?" }, { "from": "gpt", "value": "The answer is ocean" } ], "clip_score": 20.328125, "loss": 3.162827253341675 }, { "id": "identity_271898", "image": "TextVQA/images/24b5125dfe480ff8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is displayed on the building's sign?" }, { "from": "gpt", "value": "The answer is menu" } ], "clip_score": 20.890625, "loss": 3.328369617462158 }, { "id": "identity_125158", "image": "VQA2.0/images/74925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person preparing the food wearing gloves?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.765625, "loss": 3.4651999473571777 }, { "id": "identity_154249", "image": "AI2D/images/96.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction does the Earth rotate as indicated by the image?" }, { "from": "gpt", "value": "The answer is West to East or Counterclockwise" } ], "clip_score": 33.3125, "loss": 2.5451459884643555 }, { "id": "identity_204032", "image": "A-OKVQA/images/241728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a trash can visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.4375, "loss": 3.2458412647247314 }, { "id": "identity_108557", "image": "VQA2.0/images/492299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any item in the suitcase that is not footwear?" }, { "from": "gpt", "value": "The answer is Yes, a white garment" } ], "clip_score": 33.15625, "loss": 3.5485305786132812 }, { "id": "identity_299181", "image": "ScienceQA/images/19000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Wisconsin?\nChoices:\n(A) Minneapolis\n(B) Springfield\n(C) Milwaukee\n(D) Madison" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.703125, "loss": 3.7482638359069824 }, { "id": "identity_251652", "image": "GeoQA+/images/4866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AB=6,以点A为圆心,3为半径的圆与边BC相切于点D,与AC,AB分别交于点E和点G,点F是优弧GE上一点,∠CDE=18°,若∠GFE的度数是48°,求∠BAC的度数。" }, { "from": "gpt", "value": "The answer is 108°" } ], "clip_score": 29.234375, "loss": 1.548973560333252 }, { "id": "identity_210665", "image": "PMC-VQA/images/PMC7198717_F1_162893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lobe of the brain is the lesion located in?" }, { "from": "gpt", "value": "The answer is Frontal lobe" } ], "clip_score": 30.625, "loss": 2.1405210494995117 }, { "id": "identity_311758", "image": "TextVQA/images/546e27f604a7a342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last word on this?" }, { "from": "gpt", "value": "The answer is paradise" } ], "clip_score": 13.984375, "loss": 4.762145519256592 }, { "id": "identity_325542", "image": "IconQA/images/79902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 25.59375, "loss": 2.2072486877441406 }, { "id": "identity_198876", "image": "PMC-VQA/images/PMC5019880_fig1_72290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.84375, "loss": 3.008729934692383 }, { "id": "identity_81256", "image": "UniGeo/images/2572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rhombus ABCD, if the length of EF is 4, what is the length of side AD?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.125, "loss": 2.2845518589019775 }, { "id": "identity_272400", "image": "CLEVR-Math/images/CLEVR_train_040268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 7 yellow cubes, how many yellow cubes remain?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 18.53125, "loss": 2.657816171646118 }, { "id": "identity_55776", "image": "DVQA/images/bar_train_00178555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the tallest \"lap\" bar in the chart?" }, { "from": "gpt", "value": "The answer is Approximately 10^9" } ], "clip_score": 26.0625, "loss": 2.9314181804656982 }, { "id": "identity_245304", "image": "FigureQA/images/70111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest values of the Gray line?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 20.78125, "loss": 2.5141408443450928 }, { "id": "identity_126167", "image": "VQA2.0/images/110060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person wearing gloves?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.5546875, "loss": 3.3843331336975098 }, { "id": "identity_261716", "image": "TQA/images/abc_question_images/parts_worm_17347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the letter that comes between the crop and the clitellum?\nChoices:\n(A) E\n(B) L\n(C) A\n(D) T" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.421875, "loss": 4.203387260437012 }, { "id": "identity_224727", "image": "PMC-VQA/images/PMC7325026_Fig2_201874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features visible in the lateral view of the Munchkin cat's skeleton, and how do these features compare to a standard cat's skeleton?" }, { "from": "gpt", "value": "The answer is Shortened limbs, curved spine, normal-sized body and head" } ], "clip_score": 31.5, "loss": 3.1403703689575195 }, { "id": "identity_1233", "image": "CLEVR-Math/images/CLEVR_train_043123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 1 balls. How many balls are left?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.078125, "loss": 2.492797374725342 }, { "id": "identity_47702", "image": "DVQA/images/bar_train_00028529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for bread in both stores combined?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.140625, "loss": 2.641439199447632 }, { "id": "identity_40393", "image": "FigureQA/images/39066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 34.6875, "loss": 3.771693706512451 }, { "id": "identity_97847", "image": "TabMWP/images/33763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the volleyball activity?" }, { "from": "gpt", "value": "The answer is 55 minutes" } ], "clip_score": 31.171875, "loss": 2.6783642768859863 }, { "id": "identity_159522", "image": "GeoQA+/images/1866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD?" }, { "from": "gpt", "value": "The answer is 20 degrees" } ], "clip_score": 32.3125, "loss": 2.087764024734497 }, { "id": "identity_333571", "image": "PMC-VQA/images/PMC6684521_F1_20545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this depict?\nChoices:\n(A) Microtubules in RNFF1 cell line,\n(B) Dysfunctional telomeres in MR39 cell line,\n(C) Signal intensities of individual chromosomes,\n(D) Spread of cells on 9th passage." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.421875, "loss": 4.694847583770752 }, { "id": "identity_261987", "image": "TabMWP/images/37427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Emily requires to purchase both a math book and a cat calendar?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 36.0625, "loss": 2.4792141914367676 }, { "id": "identity_315817", "image": "DVQA/images/bar_train_00084408.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart shown in a percentage scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.984375, "loss": 3.608560085296631 }, { "id": "identity_135115", "image": "DVQA/images/bar_train_00108744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who preferred marine objects in the pistol category out of the total number of people?" }, { "from": "gpt", "value": "The answer is 60%" } ], "clip_score": 34.6875, "loss": 1.8171939849853516 }, { "id": "identity_199912", "image": "TextVQA/images/28a1f2139ae612da.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many application icons are displayed on the phone's screen?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 21.234375, "loss": 2.167998790740967 }, { "id": "identity_206788", "image": "PMC-VQA/images/PMC7688103_pone.0241916.g001_304772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides does the polygon have?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 21.65625, "loss": 2.790966510772705 }, { "id": "identity_273631", "image": "TextVQA/images/7bf2efb3f6d4c96a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the quantity of cups?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 18.40625, "loss": 1.5365151166915894 }, { "id": "identity_596", "image": "A-OKVQA/images/355786.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is flying through the air?\nChoices:\n(A) stuffed animal\n(B) livestock\n(C) bear\n(D) chicken" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.59375, "loss": 4.000203609466553 }, { "id": "identity_155419", "image": "ScienceQA/images/17355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles of water are there in total in both Belle's and Kate's lunch?" }, { "from": "gpt", "value": "The answer is 7 bottles" } ], "clip_score": 19.859375, "loss": 2.3742258548736572 }, { "id": "identity_130108", "image": "TextVQA/images/0b71ba450dc201c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the four-letter word inscribed at the bottom of the left seal?" }, { "from": "gpt", "value": "The answer is DOMI" } ], "clip_score": 18.328125, "loss": 4.092040538787842 }, { "id": "identity_137563", "image": "DocVQA/images/htfg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission used for the fax?" }, { "from": "gpt", "value": "The answer is IFC" } ], "clip_score": 24.40625, "loss": 5.0890021324157715 }, { "id": "identity_88857", "image": "Geometry3K/images/1805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If PR is parallel to KL, and the length of KP is twice the length of PM, what is the ratio of the area of triangle KPN to the area of triangle LNM?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.09375, "loss": 2.6252634525299072 }, { "id": "identity_62425", "image": "DocVQA/images/yrcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is Executive Vice President" } ], "clip_score": 21.25, "loss": 3.736257791519165 }, { "id": "identity_104198", "image": "A-OKVQA/images/207491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cars are parked on the right side of the street?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 18.78125, "loss": 4.560756683349609 }, { "id": "identity_192380", "image": "PMC-VQA/images/PMC7509077_f0005_259228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are labeled with the letter 'D'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.625, "loss": 2.447070837020874 }, { "id": "identity_37235", "image": "DocVQA/images/qqjw0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total maximum cost?" }, { "from": "gpt", "value": "The answer is $738,217" } ], "clip_score": 19.0625, "loss": 2.3988687992095947 }, { "id": "identity_181000", "image": "IconQA/images/5834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not colored?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 30.125, "loss": 1.787851095199585 }, { "id": "identity_5793", "image": "ScienceQA/images/12598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which i in column 3?\nChoices:\n(A) the fast-food restaurant\n(B) the theater\n(C) the grocery store\n(D) the pond" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 3.8517165184020996 }, { "id": "identity_302261", "image": "PMC-VQA/images/PMC2989308_F1_79157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which veins are indicated in the caption?\nChoices:\n(A) Femoral vein and popliteal vein \n(B) Posterior tibial vein and peroneal vein \n(C) External iliac vein and common femoral vein \n(D) Saphenous vein and deep femoral vein" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.241753578186035 }, { "id": "identity_278999", "image": "VQA2.0/images/110060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of individuals depicted?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 12.0859375, "loss": 2.3150527477264404 }, { "id": "identity_90597", "image": "Geometry3K/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of the angle marked with a pink square on the bottom left of the image" }, { "from": "gpt", "value": "The answer is The angle measures 63 degrees" } ], "clip_score": 38.03125, "loss": 3.086211919784546 }, { "id": "identity_178437", "image": "IconQA/images/33194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of squares with dots to the total number of squares?" }, { "from": "gpt", "value": "The answer is 1:4" } ], "clip_score": 30.3125, "loss": 1.6261361837387085 }, { "id": "identity_213376", "image": "A-OKVQA/images/537925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two words are written below the motel name on the sign?" }, { "from": "gpt", "value": "The answer is No Vacancy" } ], "clip_score": 34.78125, "loss": 2.659228563308716 }, { "id": "identity_129092", "image": "TextVQA/images/da7af4d7278cb999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model number of the laptop displayed?" }, { "from": "gpt", "value": "The answer is VJS131" } ], "clip_score": 19.453125, "loss": 3.497723340988159 }, { "id": "identity_240084", "image": "A-OKVQA/images/39456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the dessert in the image and list three common ingredients used to make it" }, { "from": "gpt", "value": "The answer is Flour, eggs, milk" } ], "clip_score": 21.453125, "loss": 3.7943615913391113 }, { "id": "identity_31267", "image": "DocVQA/images/rlww0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the classification of the project P2(8)?" }, { "from": "gpt", "value": "The answer is Bone" } ], "clip_score": 22.375, "loss": 4.352122783660889 }, { "id": "identity_178570", "image": "TabMWP/images/851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many customers rented a movie exactly 4 times?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 27.375, "loss": 1.8801676034927368 }, { "id": "identity_319127", "image": "TextVQA/images/42a7f9fb024854b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the word after this?" }, { "from": "gpt", "value": "The answer is landing" } ], "clip_score": 16.59375, "loss": 4.088159561157227 }, { "id": "identity_185471", "image": "VQA2.0/images/162295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of meat is used as a topping on this pizza?" }, { "from": "gpt", "value": "The answer is Ground meat" } ], "clip_score": 28.875, "loss": 3.2695047855377197 }, { "id": "identity_2195", "image": "TextVQA/images/2ae2114a4a2f116b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is on the sign?" }, { "from": "gpt", "value": "The answer is guess" } ], "clip_score": 22.203125, "loss": 3.108985185623169 }, { "id": "identity_168486", "image": "IconQA/images/21689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple circles are there?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.0625, "loss": 2.283411979675293 }, { "id": "identity_336465", "image": "ScienceQA/images/5057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each set. Which of the following statements is accurate?\nChoices:\n(A) The magnitude of the magnetic force is greater in the second set.\n(B) The magnitude of the magnetic force is the same in both sets.\n(C) The magnitude of the magnetic force is greater in the first set." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.828125, "loss": 3.404815196990967 }, { "id": "identity_117700", "image": "VizWiz/images/VizWiz_train_00015927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the bottle in liters?" }, { "from": "gpt", "value": "The answer is 1.75 liters" } ], "clip_score": 15.1796875, "loss": 1.5766406059265137 }, { "id": "identity_46817", "image": "FigureQA/images/88989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Saddle Brown line?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 23.921875, "loss": 2.4271657466888428 }, { "id": "identity_288390", "image": "PlotQA/images/139947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gap between the highest and lowest percentage of workers in the industry?" }, { "from": "gpt", "value": "The answer is 12.2999992370605" } ], "clip_score": 27.1875, "loss": 1.9937446117401123 }, { "id": "identity_248213", "image": "A-OKVQA/images/237919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the responsibilities of the woman in the chair during a tennis match, and how do they differ from those of a line judge?" }, { "from": "gpt", "value": "The answer is Chair umpire oversees the match; line judge calls lines" } ], "clip_score": 26.265625, "loss": 3.2061266899108887 }, { "id": "identity_189807", "image": "A-OKVQA/images/197690.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player standing at first base?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.46875, "loss": 2.026134729385376 }, { "id": "identity_246608", "image": "DocVQA/images/pkmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount authorized for the employee named FRANCES ELLEN BROCK, and what is the period for which this amount is authorized?" }, { "from": "gpt", "value": "The answer is $485.00 for the period 8/1 to 8/31/66" } ], "clip_score": 24.890625, "loss": 1.9905972480773926 }, { "id": "identity_338048", "image": "PMC-VQA/images/PMC7752459_ccr33219-fig-0001_324389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the imaging technique used to detect the abnormal signal at the bilateral pulvinar areas?\nChoices:\n(A) CT scan\n(B) X-ray\n(C) PET scan\n(D) MRI scan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.21875, "loss": 3.7639739513397217 }, { "id": "identity_190867", "image": "PMC-VQA/images/PMC6607307_gch2201800040-fig-0003_3410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level indicated in the SEM image?" }, { "from": "gpt", "value": "The answer is The magnification level is not explicitly stated" } ], "clip_score": 29.28125, "loss": 3.152536153793335 }, { "id": "identity_189229", "image": "A-OKVQA/images/161725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of shoes is the man wearing?" }, { "from": "gpt", "value": "The answer is White sneakers" } ], "clip_score": 19.921875, "loss": 2.356442928314209 }, { "id": "identity_284813", "image": "TextVQA/images/860bcefc75723ccf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the nature of the attack?" }, { "from": "gpt", "value": "The answer is crab monsters " } ], "clip_score": 18.71875, "loss": 3.478121757507324 }, { "id": "identity_238185", "image": "Geometry3K/images/246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of y" }, { "from": "gpt", "value": "The answer is -80" } ], "clip_score": 27.78125, "loss": 3.4066574573516846 }, { "id": "identity_264702", "image": "VQA2.0/images/467177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you specify the color of the office chair?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 28.109375, "loss": 3.420194149017334 }, { "id": "identity_175076", "image": "IconQA/images/34307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Dale had left 30 minutes earlier, what time would it have been?" }, { "from": "gpt", "value": "The answer is 2:30 PM" } ], "clip_score": 24.28125, "loss": 1.5446020364761353 }, { "id": "identity_39326", "image": "DVQA/images/bar_train_00040366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the door pattern for the 'victim' category?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 31.40625, "loss": 2.264979600906372 }, { "id": "identity_109561", "image": "VQA-AS/images/3817.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sources of light are depicted in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.625, "loss": 4.541760444641113 }, { "id": "identity_325429", "image": "GeoQA+/images/3868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,其外角∠BAE的平分线交⊙O于点D,点A为弧CD的中点.若∠ABC=28°,则∠ACB的大小为多少?\nChoices:\n(A) 84°\n(B) 85°\n(C) 86°\n(D) 88°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.609375, "loss": 2.268162250518799 }, { "id": "identity_87456", "image": "GeoQA+/images/1112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若AB=10,则⊙O的半径是多少?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.71875, "loss": 2.296412706375122 }, { "id": "identity_34209", "image": "CLEVR-Math/images/CLEVR_train_049171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 8 tiny yellow objects. How many tiny yellow objects are left?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 22.28125, "loss": 2.6291229724884033 }, { "id": "identity_84634", "image": "UniGeo/images/1520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the perimeter of triangle ABC is 24 cm, what is the length of side AB?" }, { "from": "gpt", "value": "The answer is 8 cm" } ], "clip_score": 36.28125, "loss": 1.7639914751052856 }, { "id": "identity_194922", "image": "PMC-VQA/images/PMC6712706_Fig2_29631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many toes are visible in the radiograph?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 34.71875, "loss": 4.065085411071777 }, { "id": "identity_97380", "image": "TabMWP/images/32886.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of trips to the mall made by the people surveyed last month?" }, { "from": "gpt", "value": "The answer is 70 trips" } ], "clip_score": 35.15625, "loss": 2.0860090255737305 }, { "id": "identity_120691", "image": "VQA2.0/images/102168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the machine intended for indoor or outdoor use?" }, { "from": "gpt", "value": "The answer is Both indoor and outdoor use" } ], "clip_score": 16.296875, "loss": 2.5840699672698975 }, { "id": "identity_220853", "image": "PMC-VQA/images/PMC5356317_Fig6_141692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color predominantly occupies the top half of the image?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 22.046875, "loss": 3.840007781982422 }, { "id": "identity_43415", "image": "PlotQA/images/54202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of fertilizer production for Malawi in 2007?" }, { "from": "gpt", "value": "The answer is Approximately 40000%" } ], "clip_score": 27.40625, "loss": 2.374523639678955 }, { "id": "identity_273527", "image": "VizWiz/images/VizWiz_train_00013907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the name of this software title?" }, { "from": "gpt", "value": "The answer is life" } ], "clip_score": 23.578125, "loss": 3.419605255126953 }, { "id": "identity_31803", "image": "DocVQA/images/mkln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration as per the report?" }, { "from": "gpt", "value": "The answer is 05 ' 18" } ], "clip_score": 21.203125, "loss": 3.432265281677246 }, { "id": "identity_297713", "image": "A-OKVQA/images/279083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action does the pitcher prompt the batter to take in this scenario?\nChoices:\n(A) quit\n(B) go home\n(C) wait\n(D) swing" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.375, "loss": 3.7540907859802246 }, { "id": "identity_262503", "image": "TabMWP/images/15393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of states visited by each student in Tanvi's class as recorded by the students?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.0, "loss": 3.6578235626220703 }, { "id": "identity_209628", "image": "A-OKVQA/images/252086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the website URL advertised on the banner in the background?" }, { "from": "gpt", "value": "The answer is k12.com/play" } ], "clip_score": 12.59375, "loss": 2.709998846054077 }, { "id": "identity_230167", "image": "VQA-RAD/images/synpic59126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other anatomical structures that indicate situs inversus in this X-ray image?" }, { "from": "gpt", "value": "The answer is Yes, there are other structures indicating situs inversus" } ], "clip_score": 28.28125, "loss": 2.004164218902588 }, { "id": "identity_300655", "image": "IconQA/images/85511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are here?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.578125, "loss": 2.2184743881225586 }, { "id": "identity_172882", "image": "TabMWP/images/2491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest score achieved by the students?" }, { "from": "gpt", "value": "The answer is 92" } ], "clip_score": 30.015625, "loss": 2.8239893913269043 }, { "id": "identity_74504", "image": "TQA/images/abc_question_images/human_system_digestive_13655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label X represent in the diagram?" }, { "from": "gpt", "value": "The answer is Gallbladder" } ], "clip_score": 31.171875, "loss": 1.4206804037094116 }, { "id": "identity_108389", "image": "Super-CLEVR/images/superCLEVR_new_001932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of airplanes in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 15.5390625, "loss": 2.4713966846466064 }, { "id": "identity_299814", "image": "A-OKVQA/images/557174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of animal resides in this house? \nOptions:\n(A) reptile\n(B) fish\n(C) cat\n(D) dog" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 7.28515625, "loss": 3.4120593070983887 }, { "id": "identity_35905", "image": "VQA2.0/images/107787.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more black or white umbrellas?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 22.3125, "loss": 2.781576156616211 }, { "id": "identity_307819", "image": "FigureQA/images/74270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color less than Deep Pink?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.96875, "loss": 3.8623032569885254 }, { "id": "identity_92678", "image": "TabMWP/images/788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what would be the y value when x is 18?" }, { "from": "gpt", "value": "The answer is -1" } ], "clip_score": 29.15625, "loss": 3.2932188510894775 }, { "id": "identity_224137", "image": "DocVQA/images/gtkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contract number and the period ending date given in the document?" }, { "from": "gpt", "value": "The answer is DA-49-007-MD-784, 15 September, 1960" } ], "clip_score": 24.25, "loss": 2.347531318664551 }, { "id": "identity_329848", "image": "AI2D/images/3867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is D important for a leaf's function?\nChoices:\n(A) Biology An aggregation of morphologically similar cells and associated intercellular matter acting together to perform one or more specific functions in the body. There are four basic types of tissue: muscle, nerve, epidermal, and connective.\n(B) The food-conducting tissue of vascular plants, consisting of sieve tubes, fibers, parenchyma, and sclereids. Also called bast.\n(C) Botany The layer of cutin covering the epidermis of the aerial parts of plants.\n(D) The outer, protective, nonvascular layer of the skin of vertebrates, covering the dermis." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.765625, "loss": 3.1580772399902344 }, { "id": "identity_26119", "image": "DVQA/images/bar_train_00100346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the bottom in each group?" }, { "from": "gpt", "value": "The answer is color" } ], "clip_score": 21.84375, "loss": 3.432650089263916 }, { "id": "identity_7027", "image": "AI2D/images/1914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web which is the organism that does not eat other animals?\nChoices:\n(A) shrew\n(B) grasshopper, rabbit\n(C) hawk\n(D) owl" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.9375, "loss": 3.9371533393859863 }, { "id": "identity_189214", "image": "TextVQA/images/ed7e1688e00a4f45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the issue number of this book?" }, { "from": "gpt", "value": "The answer is No 11 T.1" } ], "clip_score": 14.390625, "loss": 3.6924631595611572 }, { "id": "identity_39659", "image": "FigureQA/images/37576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest section in the pie chart?" }, { "from": "gpt", "value": "The answer is Light Seafoam" } ], "clip_score": 37.53125, "loss": 2.4256811141967773 }, { "id": "identity_336316", "image": "DVQA/images/bar_train_00051192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total people preferred this object across all the categories?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 27.71875, "loss": 2.105384349822998 }, { "id": "identity_240065", "image": "VQA-RAD/images/synpic33422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of pulmonary edema or pleural effusion in the image?" }, { "from": "gpt", "value": "The answer is No signs of pulmonary edema or pleural effusion" } ], "clip_score": 32.125, "loss": 1.7126145362854004 }, { "id": "identity_16958", "image": "TabMWP/images/24189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Emmy kept a driving log to see how many miles she drove each day. What is the range of the numbers?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 32.6875, "loss": 3.1966967582702637 }, { "id": "identity_43806", "image": "ChartQA/images/two_col_100701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sales volume in million euros for the year 2014?" }, { "from": "gpt", "value": "The answer is 16,259" } ], "clip_score": 23.28125, "loss": 2.346649169921875 }, { "id": "identity_297625", "image": "VizWiz/images/VizWiz_train_00000701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide information about this color?" }, { "from": "gpt", "value": "The answer is right pink black middle black left pink black" } ], "clip_score": 27.5, "loss": 3.407630681991577 }, { "id": "identity_302856", "image": "PMC-VQA/images/PMC7393149_Fig1_222964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the green color in the three-dimensional view of this part of the eye?\nChoices:\n(A) Hyaloid vessels\n(B) Retinal tissues\n(C) Choroidal tissues\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.3125, "loss": 4.549305438995361 }, { "id": "identity_168438", "image": "TabMWP/images/31486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a customer purchases 1 pound of pushpins and 1 pound of teeth hooks, what is the difference in cost between the two items?" }, { "from": "gpt", "value": "The answer is $2" } ], "clip_score": 33.65625, "loss": 2.785661458969116 }, { "id": "identity_107685", "image": "TextVQA/images/00bdc07a03221bd8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the liquid in the bottle?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 11.78125, "loss": 4.031358242034912 }, { "id": "identity_120038", "image": "TextVQA/images/abbc4bd5a7b96200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase is used to describe the Kadette Jewel radio in the advertisement?" }, { "from": "gpt", "value": "The answer is A NEW TYPE OF RADIO... INTRODUCED IN A NEW WAY" } ], "clip_score": 42.0625, "loss": 1.1703649759292603 }, { "id": "identity_243010", "image": "VizWiz/images/VizWiz_train_00017775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme of the text in the image, and how does it relate to the visual elements present in the picture?" }, { "from": "gpt", "value": "The answer is The power of Allah over natural phenomena" } ], "clip_score": 29.6875, "loss": 4.091137886047363 }, { "id": "identity_320071", "image": "PMC-VQA/images/PMC7849607_F2_357213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the black arrows? \nChoices:\n(A) Gyral atrophy \n(B) Gyral hyperplasia \n(C) Gyral edema \n(D) Gyral ossification" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.5625, "loss": 4.49957275390625 }, { "id": "identity_311883", "image": "VQA2.0/images/122709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place?" }, { "from": "gpt", "value": "The answer is baseball" } ], "clip_score": 27.1875, "loss": 3.654120683670044 }, { "id": "identity_278838", "image": "A-OKVQA/images/69856.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of the state shown in the image? \n(A) Manhattan \n(B) Buffalo \n(C) Albany \n(D) Poughkeepsie" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.140625, "loss": 4.011298656463623 }, { "id": "identity_136412", "image": "DocVQA/images/ffcn0226_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What position did Christopher Jahn hold at the National Association of Chemical Distributors?" }, { "from": "gpt", "value": "The answer is President" } ], "clip_score": 26.796875, "loss": 2.676086902618408 }, { "id": "identity_313633", "image": "TabMWP/images/5122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The houses in this town get their water from a local reservoir. Diane and some engineers monitor the water level for safety and health reasons. The change in water level over two months is shown in the table. During which month did the water level change more?\nChoices:\n(A) April\n(B) May" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.53125, "loss": 3.0087122917175293 }, { "id": "identity_329232", "image": "PMC-VQA/images/PMC5387068_F3_151495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image?\nChoices:\n(A) A slice of a brain with metastasis\n(B) An enhanced green fluorescent protein (eGFP) labeled lesion\n(C) A slice of a brain after treatment with bevacizumab and L1-10AIB\n(D) A slice of a normal brain" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.09375, "loss": 3.9118752479553223 }, { "id": "identity_221988", "image": "VQA2.0/images/174615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of precipitation is evident in the image?" }, { "from": "gpt", "value": "The answer is Snow" } ], "clip_score": 19.90625, "loss": 3.7791881561279297 }, { "id": "identity_159176", "image": "GeoQA+/images/4015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the right triangle ABC?" }, { "from": "gpt", "value": "The answer is 2 + 4 + sqrt(20)" } ], "clip_score": 30.9375, "loss": 1.6781398057937622 }, { "id": "identity_165514", "image": "UniGeo/images/2437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AC?" }, { "from": "gpt", "value": "The answer is 115.48 meters" } ], "clip_score": 31.328125, "loss": 2.6882646083831787 }, { "id": "identity_126078", "image": "A-OKVQA/images/248818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the vehicle?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 13.484375, "loss": 2.085644483566284 }, { "id": "identity_67696", "image": "AI2D/images/3614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the tube that connects the throat to the stomach" }, { "from": "gpt", "value": "The answer is Esophagus" } ], "clip_score": 30.96875, "loss": 1.5826491117477417 }, { "id": "identity_232783", "image": "AI2D/images/1403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organ system shown in the figure and list three major functions it performs" }, { "from": "gpt", "value": "The answer is Respiratory system; gas exchange, pH regulation, protection" } ], "clip_score": 32.40625, "loss": 2.741412401199341 }, { "id": "identity_88897", "image": "Geometry3K/images/474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BYP?" }, { "from": "gpt", "value": "The answer is 232.5 degrees" } ], "clip_score": 29.234375, "loss": 2.260024070739746 }, { "id": "identity_28960", "image": "TextVQA/images/d7ac482188ca280b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what merchandise section is displayed on the left?" }, { "from": "gpt", "value": "The answer is plumbing" } ], "clip_score": 24.046875, "loss": 2.7838189601898193 }, { "id": "identity_168083", "image": "IconQA/images/60365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vans in the image?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 27.84375, "loss": 2.0859177112579346 }, { "id": "identity_120339", "image": "VQA2.0/images/47347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the pedestrians crossing the street?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.875, "loss": 2.9681947231292725 }, { "id": "identity_248100", "image": "A-OKVQA/images/403862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three distinct features of the train that make it resemble the character Thomas, and explain how each feature contributes to this resemblance" }, { "from": "gpt", "value": "The answer is Face, color scheme, shape and design" } ], "clip_score": 28.25, "loss": 4.040406703948975 }, { "id": "identity_228498", "image": "ScienceQA/images/17745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In this experiment, if the total number of plots is 20 and the number of plots with fires is 12, how many plots were part of the control group, and what percentage of the total plots does the control group represent?" }, { "from": "gpt", "value": "The answer is 8 plots, 40%" } ], "clip_score": 13.953125, "loss": 2.267049551010132 }, { "id": "identity_127606", "image": "A-OKVQA/images/232963.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the interior wall of the room?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 14.3125, "loss": 3.4405510425567627 }, { "id": "identity_35489", "image": "A-OKVQA/images/166424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are they doing?\nChoices:\n(A) resting\n(B) waiting ski\n(C) cleaning up\n(D) eating dinner" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.110487937927246 }, { "id": "identity_336081", "image": "PlotQA/images/133623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the heading of this chart?" }, { "from": "gpt", "value": "The answer is Percentage of ores and metals imported from different countries" } ], "clip_score": 30.25, "loss": 1.1206879615783691 }, { "id": "identity_189142", "image": "PMC-VQA/images/PMC8796580_Fig2_179495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the imaging technique used in this image invasive or non-invasive?" }, { "from": "gpt", "value": "The answer is Non-invasive" } ], "clip_score": 26.859375, "loss": 2.1691508293151855 }, { "id": "identity_220520", "image": "A-OKVQA/images/514904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the napkin wrapped around the food item in the person's right hand?" }, { "from": "gpt", "value": "The answer is A blue flower" } ], "clip_score": 16.65625, "loss": 4.252645015716553 }, { "id": "identity_147041", "image": "DocVQA/images/zqvw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a confidentiality disclosure agreement mentioned in the document?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.0625, "loss": 3.292008638381958 }, { "id": "identity_118646", "image": "TextVQA/images/021e8e65743a4599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color scheme of the flags shown in the image?" }, { "from": "gpt", "value": "The answer is Red, white, and blue" } ], "clip_score": 19.265625, "loss": 2.14924693107605 }, { "id": "identity_194585", "image": "A-OKVQA/images/99681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the machine located at the left foreground of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.171875, "loss": 3.574111223220825 }, { "id": "identity_288816", "image": "VQA2.0/images/580906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 19.1875, "loss": 2.9290030002593994 }, { "id": "identity_138883", "image": "FigureQA/images/963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of the bars labeled \"Coral\" and \"Tomato\" combined?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 21.375, "loss": 1.9674911499023438 }, { "id": "identity_58749", "image": "FigureQA/images/90488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which line has the steepest slope between x-axis labels 20 and 40?" }, { "from": "gpt", "value": "The answer is Medium Seafoam" } ], "clip_score": 28.78125, "loss": 2.0257298946380615 }, { "id": "identity_209753", "image": "PMC-VQA/images/PMC6798454_Fig2_52632.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any cells in the image that appear to be in the process of division?" }, { "from": "gpt", "value": "The answer is No, there are no dividing cells" } ], "clip_score": 24.828125, "loss": 2.3076958656311035 }, { "id": "identity_329564", "image": "TQA/images/question_images/optics_reflection_9189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many angles are involved when light interacts with a mirror?\nChoices:\n(A) 2\n(B) 4\n(C) 3\n(D) 1" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.234375, "loss": 3.592383861541748 }, { "id": "identity_129980", "image": "PMC-VQA/images/PMC5402248_fig2_156006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hardware or surgical implants in the radiograph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.203125, "loss": 2.7808451652526855 }, { "id": "identity_144490", "image": "MapQA/images/map_4335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a percentage of farms that fall within the 19.7%-28.3% range?" }, { "from": "gpt", "value": "The answer is Six states" } ], "clip_score": 41.21875, "loss": 4.27927827835083 }, { "id": "identity_251955", "image": "TQA/images/question_images/aquifers_6517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the depth range in meters where drinking water is injected, and what geological layer prevents any flow between the lower greensand and chalk aquifers?" }, { "from": "gpt", "value": "The answer is 400m to 450m, Gault Clay" } ], "clip_score": 32.5, "loss": 1.7596969604492188 }, { "id": "identity_86027", "image": "GeoQA+/images/2328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在⊙O中,若∠ABC=50°,则∠AEC的度数为多少?" }, { "from": "gpt", "value": "The answer is 75°" } ], "clip_score": 31.8125, "loss": 2.0468621253967285 }, { "id": "identity_99688", "image": "TabMWP/images/21411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If 50 new students enroll at Oceanside Elementary, how many students will the school have?" }, { "from": "gpt", "value": "The answer is 479" } ], "clip_score": 27.703125, "loss": 2.284543514251709 }, { "id": "identity_195235", "image": "PMC-VQA/images/PMC7213657_FIG3_166476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the catheter in the image?" }, { "from": "gpt", "value": "The answer is To inject contrast dye for vessel visualization" } ], "clip_score": 29.28125, "loss": 3.5626769065856934 }, { "id": "identity_48563", "image": "ChartQA/images/04930672006502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined share of suicide deaths from pesticide poisoning for the regions of Americas and Africa?" }, { "from": "gpt", "value": "The answer is 12.3%" } ], "clip_score": 42.25, "loss": 1.8471816778182983 }, { "id": "identity_238433", "image": "UniGeo/images/1899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O直径,OB=6,弦CD=10,若弦心距OP的长为√{11},求弦CD的中点到圆心O的距离。" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.765625, "loss": 2.802243947982788 }, { "id": "identity_215469", "image": "A-OKVQA/images/395975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ceiling lights are there in the room?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 18.3125, "loss": 2.3774373531341553 }, { "id": "identity_121466", "image": "PMC-VQA/images/PMC8029940_f10_412209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure indicated by the white arrow?" }, { "from": "gpt", "value": "The answer is Uterus" } ], "clip_score": 27.3125, "loss": 2.590960741043091 }, { "id": "identity_245879", "image": "PMC-VQA/images/PMC3820302_fig12_241670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible implications of the research conducted by the National Institutes of Health/Department of Health and Human Services based on the microscopic image provided?" }, { "from": "gpt", "value": "The answer is Advancing medical knowledge and developing treatments for diseases" } ], "clip_score": 21.484375, "loss": 3.025981903076172 }, { "id": "identity_220769", "image": "A-OKVQA/images/209859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the green trains?" }, { "from": "gpt", "value": "The answer is FRET" } ], "clip_score": 26.375, "loss": 2.919372797012329 }, { "id": "identity_30270", "image": "A-OKVQA/images/329139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the bananas intended for?\nChoices:\n(A) planting\n(B) selling\n(C) eating\n(D) disposal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.015625, "loss": 4.417684078216553 }, { "id": "identity_251991", "image": "ChartQA/images/two_col_81371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total decrease in beer sales from the peak year to the year with the lowest sales?" }, { "from": "gpt", "value": "The answer is 1.61 million hectoliters" } ], "clip_score": 27.9375, "loss": 2.0198559761047363 }, { "id": "identity_114233", "image": "TextVQA/images/b5cce7b718870ee6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the device in the water?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.46875, "loss": 3.353306293487549 }, { "id": "identity_11458", "image": "UniGeo/images/3362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,将含有30°角的直角三角板另一个锐角顶点放在圆心O上,斜边和一直角边分别与⊙O相交于A、B两点,P是优弧AB上任意一点(与A、B不重合),则∠APB=()\nChoices:\n(A) 15°\n(B) 30°\n(C) 60°\n(D) 30°或150°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.65625, "loss": 2.6817104816436768 }, { "id": "identity_195447", "image": "A-OKVQA/images/133071.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bread is the avocado mixture served on?" }, { "from": "gpt", "value": "The answer is Multigrain or whole grain bread" } ], "clip_score": 24.328125, "loss": 2.279203176498413 }, { "id": "identity_149627", "image": "ChartQA/images/2522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage between those aged 50-64 who believe marijuana should be legal for medical and recreational use and those who believe it should be legal for medical use only?" }, { "from": "gpt", "value": "The answer is 17%" } ], "clip_score": 32.96875, "loss": 2.0509276390075684 }, { "id": "identity_285373", "image": "VQA2.0/images/560020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tree branch lying horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.03125, "loss": 4.368089199066162 }, { "id": "identity_169973", "image": "IconQA/images/75605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many edges does this shape have?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 22.0, "loss": 1.9611570835113525 }, { "id": "identity_141686", "image": "DVQA/images/bar_train_00148874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the preference for \"plug\" in the \"genius\" category more than double the preference for \"evil\" in the \"change\" category?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.78125, "loss": 3.0978379249572754 }, { "id": "identity_253953", "image": "TabMWP/images/37481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the number of leftover loaves from Monday to Friday?" }, { "from": "gpt", "value": "The answer is -1.2" } ], "clip_score": 42.84375, "loss": 2.8104124069213867 }, { "id": "identity_81101", "image": "GeoQA+/images/4026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, if ∠ABC = 60° and AC = 6, what is the length of diagonal BD?" }, { "from": "gpt", "value": "The answer is 6√3" } ], "clip_score": 35.8125, "loss": 1.6567883491516113 }, { "id": "identity_280814", "image": "TextVQA/images/b8f013638e1a005b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number displayed on the umpire's sleeve?" }, { "from": "gpt", "value": "The answer is 74" } ], "clip_score": 22.703125, "loss": 2.6705925464630127 }, { "id": "identity_269798", "image": "DVQA/images/bar_train_00180513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of a drink today less than the value of silver by weight?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.84375, "loss": 3.444525957107544 }, { "id": "identity_277309", "image": "MapQA/images/map_9890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any missing data on the map?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.65625, "loss": 2.751674175262451 }, { "id": "identity_193389", "image": "TextVQA/images/936bfd55586e7dee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the genre of the book as indicated on the cover?" }, { "from": "gpt", "value": "The answer is SF (Science Fiction)" } ], "clip_score": 27.09375, "loss": 2.2010393142700195 }, { "id": "identity_139038", "image": "DocVQA/images/pkmf0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the job elimination program at Winston-Salem voluntary?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.21875, "loss": 3.1229469776153564 }, { "id": "identity_269474", "image": "PMC-VQA/images/PMC4711110_Fig1_461730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the imaging views that were obtained?\nChoices:\n(A) Axial and Sagittal\n(B) Axial and Coronal\n(C) Coronal and Sagittal\n(D) Axial, Coronal and Sagittal" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.828125, "loss": 2.993170738220215 }, { "id": "identity_8190", "image": "ChartQA/images/two_col_24068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of Twitter users in South Korea between 2014 and 2016? (In Millions)" }, { "from": "gpt", "value": "The answer is 4.9" } ], "clip_score": 29.453125, "loss": 1.8892476558685303 }, { "id": "identity_56309", "image": "FigureQA/images/25182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle of the sector representing Blue Violet?" }, { "from": "gpt", "value": "The answer is 216 degrees" } ], "clip_score": 28.796875, "loss": 2.528977632522583 }, { "id": "identity_309334", "image": "TQA/images/abc_question_images/human_system_nervous_16167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the motor cortex?\nChoices:\n(A) R\n(B) N\n(C) H\n(D) V" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.578125, "loss": 4.205824851989746 }, { "id": "identity_111445", "image": "Super-CLEVR/images/superCLEVR_new_000572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is painted green and white?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 20.71875, "loss": 3.4045231342315674 }, { "id": "identity_218110", "image": "A-OKVQA/images/12145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number of the car?" }, { "from": "gpt", "value": "The answer is CT 96LZ27" } ], "clip_score": 18.578125, "loss": 3.140723943710327 }, { "id": "identity_134", "image": "FigureQA/images/7786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.10214900970459 }, { "id": "identity_137511", "image": "ChartQA/images/7337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Americans consider the U.S. trade deficit with China as a very serious problem?" }, { "from": "gpt", "value": "The answer is 44%" } ], "clip_score": 25.25, "loss": 1.733619213104248 }, { "id": "identity_70390", "image": "TQA/images/abc_question_images/human_system_eye_16135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'X' indicate in the diagram?" }, { "from": "gpt", "value": "The answer is Lens" } ], "clip_score": 27.3125, "loss": 2.986137628555298 }, { "id": "identity_122940", "image": "A-OKVQA/images/545163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the speed limit for vehicles on this road?" }, { "from": "gpt", "value": "The answer is 10 km/h" } ], "clip_score": 21.609375, "loss": 1.6652292013168335 }, { "id": "identity_226499", "image": "Geometry3K/images/1912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of side \\( QS \\) in \\(\\triangle QRS\\) if \\(\\triangle QRS \\sim \\triangle QTP\\) and the perimeter of \\(\\triangle QRS\\) is 14" }, { "from": "gpt", "value": "The answer is \\(\\frac{11}{3}\\)" } ], "clip_score": 36.59375, "loss": 2.2231879234313965 }, { "id": "identity_111473", "image": "PMC-VQA/images/PMC4048546_f0005_295508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the MRI image show any signs of atrophy?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.15625, "loss": 2.7889835834503174 }, { "id": "identity_240334", "image": "TQA/images/question_images/nuclear_energy_8121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the process by which energy is generated and transmitted from the reactor vessel to the city, including the roles of the pressurizer, steam generator, turbine, and generator" }, { "from": "gpt", "value": "The answer is Reactor vessel, pressurizer, steam generator, turbine, generator" } ], "clip_score": 32.09375, "loss": 1.8473228216171265 }, { "id": "identity_210253", "image": "TextVQA/images/3d1380bcf7b40e57.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the architectural style of the new markets being constructed?" }, { "from": "gpt", "value": "The answer is Mid-century modern" } ], "clip_score": 25.953125, "loss": 2.803731679916382 }, { "id": "identity_250909", "image": "PMC-VQA/images/PMC7781174_Fig1_334095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential clinical implications of the areas indicated by the arrows in the images, considering they show abnormal thymus hyperplasia?" }, { "from": "gpt", "value": "The answer is Potential autoimmune diseases, infections, or neoplastic processes" } ], "clip_score": 28.046875, "loss": 2.409972667694092 }, { "id": "identity_37922", "image": "TextVQA/images/afa960687bd2e359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: do one of the signs say \"yay!\"?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 12.1171875, "loss": 3.2839303016662598 }, { "id": "identity_147077", "image": "DocVQA/images/xxmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contract number and date mentioned in the document?" }, { "from": "gpt", "value": "The answer is DA-49-007-MD-1024, 1967" } ], "clip_score": 22.640625, "loss": 3.1080281734466553 }, { "id": "identity_178050", "image": "TabMWP/images/27007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year saw the greatest improvement in record time from the previous year?" }, { "from": "gpt", "value": "The answer is 2018" } ], "clip_score": 25.640625, "loss": 1.596819281578064 }, { "id": "identity_276980", "image": "PMC-VQA/images/PMC7823325_diagnostics-11-00056-f001_348417.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely reason for the filling defects observed in the veins of the right iliac axis?\nChoices:\n(A) Calcification\n(B) Emboli\n(C) Thrombotic material\n(D) Inflammation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.09375, "loss": 4.597546577453613 }, { "id": "identity_318063", "image": "Geometry3K/images/1248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A triangle has an area of 248 square inches. Find the length of the base.\nChoices:\n(A) 15.5\n(B) 16\n(C) 31\n(D) 32" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.375, "loss": 2.9481213092803955 }, { "id": "identity_57366", "image": "DVQA/images/bar_train_00199856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value for the category labeled \"powder\" greater than 50?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.453125, "loss": 3.6351897716522217 }, { "id": "identity_327157", "image": "VQA2.0/images/209574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time does this clock show?" }, { "from": "gpt", "value": "The answer is 9:40" } ], "clip_score": 17.375, "loss": 1.7580723762512207 }, { "id": "identity_61820", "image": "DocVQA/images/rfhm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the institute that released the paper?" }, { "from": "gpt", "value": "The answer is INSTITUTE OF NUTRITION" } ], "clip_score": 31.734375, "loss": 1.704645037651062 }, { "id": "identity_11165", "image": "GeoQA+/images/1848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知AB∥CD,AD与BC相交于点P,AB=4,CD=7,PD=10,则AP的长等于()\nChoices:\n(A) \\frac{40}{11}\n(B) \\frac{70}{4}\n(C) \\frac{70}{11}\n(D) \\frac{40}{7}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.65625, "loss": 2.6746087074279785 }, { "id": "identity_289282", "image": "A-OKVQA/images/524325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material are the items that people are looking at most likely made of?\nChoices:\n(A) mud\n(B) cloth\n(C) brick\n(D) stone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 3.720703125, "loss": 4.468670845031738 }, { "id": "identity_284069", "image": "PMC-VQA/images/PMC3703402_fig5_216211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the patient's brain is the lesion located?\nChoices:\n(A) Left frontal lobe\n(B) Right frontal lobe\n(C) Occipital lobe\n(D) Temporal lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.650927543640137 }, { "id": "identity_75849", "image": "AI2D/images/3505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the structure labeled as \"Golgi apparatus\"?" }, { "from": "gpt", "value": "The answer is Modifies and packages proteins and lipids" } ], "clip_score": 26.265625, "loss": 1.6506303548812866 }, { "id": "identity_195749", "image": "A-OKVQA/images/205762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of luggage are visible in the image?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 30.015625, "loss": 2.2701802253723145 }, { "id": "identity_296129", "image": "PMC-VQA/images/PMC4269088_fig4_345247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can be observed in the right obturator canal?\nChoices:\n(A) Herniated small bowel loop\n(B) No herniation observed.\n(C) Herniated large bowel loop\n(D) Mesentery herniation." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.09375, "loss": 4.248356342315674 }, { "id": "identity_276307", "image": "GeoQA+/images/4178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that D, E, and F are points on the sides BC, CA, and AB of the isosceles triangle ABC, with AB = AC, BD = 2, CD = 3, CE = 4, AE = 3/2, ∠FDE = ∠B, what is the length of AF?\nChoices:\n(A) 5.5\n(B) 4.5\n(C) 4\n(D) 3.5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.84375, "loss": 2.561763048171997 }, { "id": "identity_260230", "image": "VQA2.0/images/496031.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is the boy eating, and what are the visible ingredients in it?" }, { "from": "gpt", "value": "The answer is Hot dog with pickle, pepper, and mustard" } ], "clip_score": 26.15625, "loss": 2.557917594909668 }, { "id": "identity_272927", "image": "GeoQA+/images/868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, a ramp is 130m long, and the distance from the top of the ramp to the horizontal ground is 50m. What is the slope of this ramp? \n(A) 5/12\n(B) 12/13\n(C) 5/13\n(D) 13/12" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.46875, "loss": 3.2166473865509033 }, { "id": "identity_124247", "image": "A-OKVQA/images/48066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing the number 14 on their red shirts?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.234375, "loss": 2.4253089427948 }, { "id": "identity_168673", "image": "IconQA/images/562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the rubber ducks facing the same direction?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.828125, "loss": 3.293487071990967 }, { "id": "identity_120798", "image": "Super-CLEVR/images/superCLEVR_new_000110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 23.890625, "loss": 2.4175522327423096 }, { "id": "identity_244135", "image": "MapQA/images/map_6508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value range of personnel for the states of Minnesota, North Dakota, and South Dakota?" }, { "from": "gpt", "value": "The answer is 495-570" } ], "clip_score": 33.25, "loss": 2.4531915187835693 }, { "id": "identity_260257", "image": "ScienceQA/images/19021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the physical properties of the material that make this bouncy ball suitable for bouncing, and how do these properties compare to those of clay?" }, { "from": "gpt", "value": "The answer is Elasticity, resilience, and durability make rubber suitable for bouncing" } ], "clip_score": 25.8125, "loss": 2.178577184677124 }, { "id": "identity_208869", "image": "PMC-VQA/images/PMC5930784_Fig1_308809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a pneumothorax in this image?" }, { "from": "gpt", "value": "The answer is No pneumothorax" } ], "clip_score": 31.9375, "loss": 2.5848851203918457 }, { "id": "identity_143744", "image": "DocVQA/images/kzlf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main focus of the Nutrition Committee's responsibilities?" }, { "from": "gpt", "value": "The answer is Nutrition and cardiovascular disease" } ], "clip_score": 20.5, "loss": 2.2149832248687744 }, { "id": "identity_192649", "image": "VQA2.0/images/502140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the straps securing the surfboards?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 28.3125, "loss": 2.7473669052124023 }, { "id": "identity_115016", "image": "PMC-VQA/images/PMC6261832_rcr2392-fig-0002_402074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No, there is no pleural effusion" } ], "clip_score": 27.078125, "loss": 1.843570590019226 }, { "id": "identity_110700", "image": "VQA2.0/images/532583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any advertisements visible in the background?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.765625, "loss": 3.2097175121307373 }, { "id": "identity_196233", "image": "TextVQA/images/16644d10770757eb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book?" }, { "from": "gpt", "value": "The answer is Nebula Award Stories 3" } ], "clip_score": 18.359375, "loss": 2.032047986984253 }, { "id": "identity_24951", "image": "PMC-VQA/images/PMC7931264_FIG2_383925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of mass was demonstrated in the image? \nChoices:\n(A) Tumor \n(B) Cyst \n(C) Hematoma \n(D) Scar tissue " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.078125, "loss": 4.768654823303223 }, { "id": "identity_232980", "image": "DocVQA/images/nfnw0228_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount due including the price of books and post, packing & insurance?" }, { "from": "gpt", "value": "The answer is £1414.42" } ], "clip_score": 21.5, "loss": 2.533154249191284 }, { "id": "identity_329961", "image": "VizWiz/images/VizWiz_train_00007835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Hi. What kind of tea is this? Thanks." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 19.890625, "loss": 4.107551097869873 }, { "id": "identity_247744", "image": "DocVQA/images/ngxh0227_48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the National Foundation State Representative for Eastern Missouri, including the name of the representative?" }, { "from": "gpt", "value": "The answer is John F. Heifner, P.O. Box 504, 410 Jefferson Street, Jefferson City, Missouri" } ], "clip_score": 24.265625, "loss": 1.9776054620742798 }, { "id": "identity_312427", "image": "ScienceQA/images/2851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organism belongs to the same genus as the European green toad?\nChoices:\n(A) Hyla cinerea\n(B) Bufo bufo\n(C) Atelopus zeteki" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.671875, "loss": 3.6246414184570312 }, { "id": "identity_176841", "image": "CLEVR-Math/images/CLEVR_train_009573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.265625, "loss": 1.93463933467865 }, { "id": "identity_329353", "image": "ChartQA/images/05705464003735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is represented by the red line?" }, { "from": "gpt", "value": "The answer is Denmark" } ], "clip_score": 16.625, "loss": 2.9124317169189453 }, { "id": "identity_276977", "image": "VQA-AS/images/1682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the item that the woman is collecting?" }, { "from": "gpt", "value": "The answer is toys" } ], "clip_score": 21.359375, "loss": 3.002673864364624 }, { "id": "identity_53456", "image": "DVQA/images/bar_train_00196131.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the highest and lowest selling items in the \"stake\" store?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.53125, "loss": 2.925359010696411 }, { "id": "identity_242117", "image": "A-OKVQA/images/65063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different gaits this animal can perform, and which one is typically faster than a trot but slower than a gallop?" }, { "from": "gpt", "value": "The answer is canter" } ], "clip_score": 28.140625, "loss": 3.5549747943878174 }, { "id": "identity_77192", "image": "AI2D/images/3718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the first part of the small intestine that receives partially digested food from the stomach" }, { "from": "gpt", "value": "The answer is Duodenum" } ], "clip_score": 31.15625, "loss": 2.011262893676758 }, { "id": "identity_130144", "image": "A-OKVQA/images/130851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person on the bed doing?" }, { "from": "gpt", "value": "The answer is Sleeping or resting" } ], "clip_score": 19.234375, "loss": 2.3857061862945557 }, { "id": "identity_184834", "image": "VQA2.0/images/527059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the parking meter adjacent to the letter 'B'?" }, { "from": "gpt", "value": "The answer is 5344" } ], "clip_score": 22.484375, "loss": 1.8259978294372559 }, { "id": "identity_71391", "image": "AI2D/images/4823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is represented by the label Pometum?" }, { "from": "gpt", "value": "The answer is Pometum represents a type of multiple fruit" } ], "clip_score": 23.4375, "loss": 2.7483596801757812 }, { "id": "identity_228020", "image": "AI2D/images/2357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the complete life cycle of the plant starting from the seed stage and explain the transitions between each stage" }, { "from": "gpt", "value": "The answer is Seed, Seedling, Adult, Seed" } ], "clip_score": 38.5, "loss": 1.9482331275939941 }, { "id": "identity_241857", "image": "TabMWP/images/1219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is Ron's net pay after taxes for the pay period April 6-12?" }, { "from": "gpt", "value": "The answer is $371.66" } ], "clip_score": 34.59375, "loss": 2.0735137462615967 }, { "id": "identity_114837", "image": "PMC-VQA/images/PMC5216371_f2_110601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the Miller indices on the rings in ascending or descending order from the center?" }, { "from": "gpt", "value": "The answer is Ascending" } ], "clip_score": 23.875, "loss": 2.463163375854492 }, { "id": "identity_101778", "image": "CLEVR-Math/images/CLEVR_train_005575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.46875, "loss": 2.4152872562408447 }, { "id": "identity_140912", "image": "FigureQA/images/98804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Purple on the y-axis?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 33.65625, "loss": 2.588071346282959 }, { "id": "identity_159691", "image": "GeoQA+/images/1789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the side length of square b is doubled, what would be the new area of square b?" }, { "from": "gpt", "value": "The answer is 4(11 + 5 + 2√55) / 4" } ], "clip_score": 30.625, "loss": 2.3974266052246094 }, { "id": "identity_56832", "image": "FigureQA/images/43497.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Brown greater than 70?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.3359375, "loss": 3.592395067214966 }, { "id": "identity_45766", "image": "ChartQA/images/two_col_81120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of views for the bottom three websites combined?" }, { "from": "gpt", "value": "The answer is 304.01 million views" } ], "clip_score": 25.75, "loss": 2.3148882389068604 }, { "id": "identity_192690", "image": "PMC-VQA/images/PMC4902502_F3_40168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of midline shift in this brain MRI?" }, { "from": "gpt", "value": "The answer is No midline shift observed" } ], "clip_score": 34.34375, "loss": 3.204094409942627 }, { "id": "identity_320053", "image": "A-OKVQA/images/368515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is a baby most likely to be in this image?\nChoices:\n(A) stroller\n(B) building\n(C) car\n(D) tree" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.59375, "loss": 4.272150993347168 }, { "id": "identity_161638", "image": "GeoQA+/images/3322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC in the diagram?" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 33.09375, "loss": 1.7266474962234497 }, { "id": "identity_176738", "image": "IconQA/images/38074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Assuming Joe spent equal time on each activity, what fraction of his total activity time was spent on shopping?" }, { "from": "gpt", "value": "The answer is 1/3" } ], "clip_score": 26.40625, "loss": 1.532370686531067 }, { "id": "identity_150919", "image": "ScienceQA/images/20142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What will be the magnetic interaction between the two magnets?" }, { "from": "gpt", "value": "The answer is Repel" } ], "clip_score": 20.875, "loss": 2.4192819595336914 }, { "id": "identity_46301", "image": "PlotQA/images/90907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is Percentage of children ages 12-23 months who received DPT vaccinations" } ], "clip_score": 33.6875, "loss": 0.8965686559677124 }, { "id": "identity_288892", "image": "VQA2.0/images/523262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals currently dressed in pink pants?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 14.1484375, "loss": 2.3670103549957275 }, { "id": "identity_208053", "image": "VQA2.0/images/146389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes of traffic are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 28.640625, "loss": 4.411191463470459 }, { "id": "identity_11620", "image": "ChartQA/images/27512313006547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the sum of all bars above 0.06kg??" }, { "from": "gpt", "value": "The answer is 0.21" } ], "clip_score": 17.609375, "loss": 2.1348838806152344 }, { "id": "identity_306840", "image": "ChartQA/images/40350636009248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is represented by this bar?" }, { "from": "gpt", "value": "The answer is Egypt" } ], "clip_score": 15.65625, "loss": 3.551997423171997 }, { "id": "identity_217929", "image": "VQA-AS/images/8709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tree trunk?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 17.359375, "loss": 3.0437865257263184 }, { "id": "identity_159240", "image": "GeoQA+/images/1351.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the quadrilateral ABDC?" }, { "from": "gpt", "value": "The answer is 20 km" } ], "clip_score": 31.75, "loss": 2.984466075897217 }, { "id": "identity_44587", "image": "DocVQA/images/zyjf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who wrote the \"White Paper\" mentioned in the document?" }, { "from": "gpt", "value": "The answer is Dr. Cohen" } ], "clip_score": 24.40625, "loss": 2.9170305728912354 }, { "id": "identity_12521", "image": "ScienceQA/images/14042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is coffee a solid or a liquid?\nChoices:\n(A) a solid\n(B) a liquid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.28125, "loss": 3.8692519664764404 }, { "id": "identity_270794", "image": "TextVQA/images/2f4db7d7756d36a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country's navy is displayed on the back of the plane?" }, { "from": "gpt", "value": "The answer is us" } ], "clip_score": 23.328125, "loss": 2.514716625213623 }, { "id": "identity_117937", "image": "VQA-AS/images/12397.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the two individuals engaged in?" }, { "from": "gpt", "value": "The answer is They are having a picnic and playing" } ], "clip_score": 30.0, "loss": 2.9847075939178467 }, { "id": "identity_247510", "image": "UniGeo/images/1995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图:已知AD∥BE∥CF,且AB=4,BC=5,EF=4,若DE=3.2,求AD的长度。" }, { "from": "gpt", "value": "The answer is 5.76" } ], "clip_score": 30.328125, "loss": 2.0812952518463135 }, { "id": "identity_303903", "image": "TextVQA/images/0279382e07bed7de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of that book?" }, { "from": "gpt", "value": "The answer is james barlow" } ], "clip_score": 17.53125, "loss": 2.9379279613494873 }, { "id": "identity_219609", "image": "VQA2.0/images/231320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clocks are showing a time between 3:00 and 6:00?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.234375, "loss": 2.3011324405670166 }, { "id": "identity_301140", "image": "FigureQA/images/80950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Deep Pink greater than this?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.28125, "loss": 4.321104526519775 }, { "id": "identity_324481", "image": "TextVQA/images/936265ee534bdf7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is on the front glass above this character?" }, { "from": "gpt", "value": "The answer is story" } ], "clip_score": 18.46875, "loss": 3.593048572540283 }, { "id": "identity_97767", "image": "IconQA/images/80083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this shape a polyhedron?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.75, "loss": 3.334442615509033 }, { "id": "identity_15127", "image": "ScienceQA/images/9302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Compare the average kinetic energies of the particles in each sample. Which sample has the higher temperature?\nChoices:\n(A) sample B\n(B) sample A\n(C) neither; the samples have the same temperature" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 2.9574313163757324 }, { "id": "identity_11951", "image": "UniGeo/images/3313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A、B、C在⊙O上,∠A=45°,则∠BOC的度数是()\nChoices:\n(A) 30°\n(B) 45°\n(C) 60°\n(D) 90°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.1875, "loss": 2.9016034603118896 }, { "id": "identity_62344", "image": "FigureQA/images/86799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values covered by the Blue line?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 24.75, "loss": 2.557514190673828 }, { "id": "identity_157184", "image": "ScienceQA/images/14565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 26.21875, "loss": 2.89235782623291 }, { "id": "identity_174401", "image": "TabMWP/images/12958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy 1 pound each of sage, oregano, and mint, how much will it cost in total?" }, { "from": "gpt", "value": "The answer is $20" } ], "clip_score": 32.1875, "loss": 2.432168483734131 }, { "id": "identity_155295", "image": "AI2D/images/161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the point in the Moon's orbit when it is closest to the Earth?" }, { "from": "gpt", "value": "The answer is Perigee" } ], "clip_score": 33.625, "loss": 2.4135818481445312 }, { "id": "identity_19990", "image": "TQA/images/question_images/periodic_table_7386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the atomic number of Iron?\nChoices:\n(A) 26\n(B) 16\n(C) 45\n(D) 39" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.34375, "loss": 3.4873733520507812 }, { "id": "identity_20685", "image": "IconQA/images/42103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 3.3905463218688965 }, { "id": "identity_137782", "image": "DocVQA/images/phgp0000_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the RJR Account Number provided in the document?" }, { "from": "gpt", "value": "The answer is 675352" } ], "clip_score": 27.515625, "loss": 2.645181179046631 }, { "id": "identity_23512", "image": "TextVQA/images/1c8599611ba24db7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the cafe?" }, { "from": "gpt", "value": "The answer is workshop cafe" } ], "clip_score": 27.921875, "loss": 2.437251091003418 }, { "id": "identity_212337", "image": "VQA2.0/images/81768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the red bus on the left?" }, { "from": "gpt", "value": "The answer is 189" } ], "clip_score": 27.234375, "loss": 2.329007387161255 }, { "id": "identity_180957", "image": "IconQA/images/60014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of bears to planes?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 16.140625, "loss": 1.6120860576629639 }, { "id": "identity_245260", "image": "GeoQA+/images/901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,在△ABC中,已知点D,E,F分别为边BC,AD,CE的中点,且S△ABC=4cm²,求△DEF的周长。" }, { "from": "gpt", "value": "The answer is (a + b + c) / 2" } ], "clip_score": 25.625, "loss": 1.5523122549057007 }, { "id": "identity_327253", "image": "PMC-VQA/images/PMC6687661_mgg3814-fig-0001_21597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain shows high intensity in this image?\nChoices:\n(A) Amygdala\n(B) Cerebellum\n(C) Centrum semiovale\n(D) Hippocampus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.875, "loss": 3.723139524459839 }, { "id": "identity_202092", "image": "A-OKVQA/images/162792.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible difference in the architecture of the buildings in the two images?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.59375, "loss": 2.6056926250457764 }, { "id": "identity_28842", "image": "PMC-VQA/images/PMC6194051_Fig2_383742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does the X-ray images change over time after treatment of the bone infarction? \nChoices:\n(A) Decreased product resorption and increased defect-radiolucency \n(B) Increased product resorption and increased defect-radiolucency \n(C) Increased product resorption and persistent reduced defect-radiolucency \n(D) Decreased product resorption and persistent reduced defect-radiolucency " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.203125, "loss": 3.154975414276123 }, { "id": "identity_71529", "image": "ScienceQA/images/12286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the likely purpose of using the shovel?" }, { "from": "gpt", "value": "The answer is Transporting or relocating sand" } ], "clip_score": 27.3125, "loss": 3.7813405990600586 }, { "id": "identity_98457", "image": "TabMWP/images/24683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more vacation days did Wayne take in 2014 compared to 2010?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 41.0, "loss": 2.7334957122802734 }, { "id": "identity_6110", "image": "A-OKVQA/images/445019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is near the ball?\nChoices:\n(A) toddler\n(B) baby\n(C) batter\n(D) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 9.0546875, "loss": 4.233142852783203 }, { "id": "identity_184556", "image": "VQA2.0/images/24923.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many railway buffer stops are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 21.359375, "loss": 4.350690841674805 }, { "id": "identity_173157", "image": "TabMWP/images/33922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jake buys 3 piccolos, how much money will he spend?" }, { "from": "gpt", "value": "The answer is $1389" } ], "clip_score": 25.171875, "loss": 2.6698832511901855 }, { "id": "identity_58128", "image": "DocVQA/images/gqyk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sum of patents in Israel and Taiwan more than the patents in Great Britain?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.015625, "loss": 2.98054838180542 }, { "id": "identity_56217", "image": "DocVQA/images/kgcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the incorrect date mentioned in the previous memorandum?" }, { "from": "gpt", "value": "The answer is Friday, November 28" } ], "clip_score": 20.546875, "loss": 2.443978786468506 }, { "id": "identity_215198", "image": "A-OKVQA/images/457011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the people engaged in?" }, { "from": "gpt", "value": "The answer is Skijoring (skiing behind a horse)" } ], "clip_score": 32.59375, "loss": 2.3599860668182373 }, { "id": "identity_168058", "image": "IconQA/images/90695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Ian's walk takes 1 hour and 15 minutes, what time will it be when he finishes his walk?" }, { "from": "gpt", "value": "The answer is 5:15" } ], "clip_score": 21.609375, "loss": 1.505602478981018 }, { "id": "identity_219530", "image": "TextVQA/images/69cad96f7d481a9a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street address for Emerson's Campus?" }, { "from": "gpt", "value": "The answer is 145-155 Tremont Street" } ], "clip_score": 27.875, "loss": 1.9745097160339355 }, { "id": "identity_324114", "image": "PMC-VQA/images/PMC5292996_fig4_122972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the next diagnostic scan after the initial CT scan of the chest?\nChoices:\n(A) MRI of the chest\n(B) X-ray of the chest\n(C) PET scan of the chest\n(D) There is no follow-up diagnostic scan." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 3.682492733001709 }, { "id": "identity_146977", "image": "ChartQA/images/10324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between \"More democratic\" and \"Less democratic\" for July 2015?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 27.515625, "loss": 2.1133322715759277 }, { "id": "identity_158671", "image": "TQA/images/question_images/earth_moon_phases_1237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two phases of the moon are symmetrical to each other in terms of illumination?" }, { "from": "gpt", "value": "The answer is First Quarter and Last Quarter" } ], "clip_score": 23.171875, "loss": 1.7318984270095825 }, { "id": "identity_176158", "image": "TabMWP/images/29864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who has the largest sticker collection?" }, { "from": "gpt", "value": "The answer is Anthony" } ], "clip_score": 31.8125, "loss": 2.8396100997924805 }, { "id": "identity_103054", "image": "CLEVR-Math/images/CLEVR_train_016086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are smaller than the green cube?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.046875, "loss": 2.508195161819458 }, { "id": "identity_93603", "image": "TabMWP/images/25666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the women's relay event?" }, { "from": "gpt", "value": "The answer is 40 minutes" } ], "clip_score": 26.390625, "loss": 2.596470355987549 }, { "id": "identity_142790", "image": "ChartQA/images/17063491001655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the Birth Rate to the Death Rate in China for the year 1980?" }, { "from": "gpt", "value": "The answer is 2.89" } ], "clip_score": 37.0625, "loss": 2.009563684463501 }, { "id": "identity_3899", "image": "Geometry3K/images/320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) 4\n(B) 4 \\sqrt { 3 }\n(C) 8\n(D) 8 \\sqrt { 3 }" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.6875, "loss": 2.5012590885162354 }, { "id": "identity_315670", "image": "VQA2.0/images/429769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this vehicle?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 22.0625, "loss": 3.11804461479187 }, { "id": "identity_199942", "image": "PMC-VQA/images/PMC4195344_fig2_327335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of pulmonary edema in this image?" }, { "from": "gpt", "value": "The answer is No signs of pulmonary edema" } ], "clip_score": 31.796875, "loss": 2.417268991470337 }, { "id": "identity_326878", "image": "DocVQA/images/phgg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the chairman for the afternoon?" }, { "from": "gpt", "value": "The answer is Doctor Robert Shank" } ], "clip_score": 20.5625, "loss": 3.767307758331299 }, { "id": "identity_166969", "image": "TabMWP/images/3028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median weight of the children?" }, { "from": "gpt", "value": "The answer is 36 pounds" } ], "clip_score": 32.90625, "loss": 2.644670009613037 }, { "id": "identity_309998", "image": "DocVQA/images/xylk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is this document dated?" }, { "from": "gpt", "value": "The answer is december 19, 1977" } ], "clip_score": 28.859375, "loss": 1.7540959119796753 }, { "id": "identity_283687", "image": "UniGeo/images/695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In right triangle ACB, where angle C is 90°, angle A is 37°, and AC is 4, approximately how long is BC? \n(A) 2.4\n(B) 3.0\n(C) 3.2\n(D) 5.0" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.578125, "loss": 2.1565184593200684 }, { "id": "identity_266422", "image": "AI2D/images/4718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which leaf shape resembles a circle or sphere in appearance?\nChoices:\n(A) rounded\n(B) cordate narrow\n(C) ovate\n(D) broad elliptic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.421875, "loss": 4.030313968658447 }, { "id": "identity_261061", "image": "PlotQA/images/72312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average personal remittances received by the government employees of St. Lucia over the years 2009 to 2013?" }, { "from": "gpt", "value": "The answer is 30,146,932.5925926" } ], "clip_score": 33.4375, "loss": 2.6222784519195557 }, { "id": "identity_183809", "image": "A-OKVQA/images/518816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of roof do the buildings have?" }, { "from": "gpt", "value": "The answer is Thatched roof" } ], "clip_score": 21.5625, "loss": 2.721137523651123 }, { "id": "identity_106453", "image": "VQA2.0/images/576780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the crates stacked on the left side of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.796875, "loss": 3.512110948562622 }, { "id": "identity_202360", "image": "A-OKVQA/images/116694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building where the people are lined up?" }, { "from": "gpt", "value": "The answer is Within ten words" } ], "clip_score": 23.390625, "loss": 5.431960582733154 }, { "id": "identity_158346", "image": "AI2D/images/4611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shortest stage in the life cycle of a mealworm?" }, { "from": "gpt", "value": "The answer is Eggs and Small Mealworm" } ], "clip_score": 36.03125, "loss": 1.7272648811340332 }, { "id": "identity_276062", "image": "IconQA/images/47155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of stars?\nChoices:\n(A) 3\n(B) 1\n(C) 2\n(D) 5\n(E) 4" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 25.90625, "loss": 3.5624661445617676 }, { "id": "identity_297387", "image": "GeoQA+/images/4706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, CA is a tangent to circle O at point A, and point B lies on circle O. If ∠CAB = 55°, what is the measure of ∠AOB?\nChoices:\n(A) 120°\n(B) 110°\n(C) 90°\n(D) 55°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.4375, "loss": 2.5986924171447754 }, { "id": "identity_97499", "image": "IconQA/images/14677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of bears to trains in the image?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 28.609375, "loss": 1.6922882795333862 }, { "id": "identity_176675", "image": "IconQA/images/11636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangles are blue?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.96875, "loss": 2.1915297508239746 }, { "id": "identity_247084", "image": "Geometry3K/images/1337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In \\(\\odot X\\), given \\(AB = 30\\), \\(CD = 30\\), and \\(m \\widehat{CZ} = 40^\\circ\\), find the length of \\(MN\\)" }, { "from": "gpt", "value": "The answer is \\(MN = 15\\)" } ], "clip_score": 26.375, "loss": 1.5247513055801392 }, { "id": "identity_17196", "image": "VQA2.0/images/44160.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What powers this engine?" }, { "from": "gpt", "value": "The answer is fuel" } ], "clip_score": 11.8046875, "loss": 3.3587403297424316 }, { "id": "identity_87247", "image": "UniGeo/images/1733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point D is reached by continuing in the same direction as from A to B, what is the measure of angle ABD?" }, { "from": "gpt", "value": "The answer is 150°" } ], "clip_score": 31.71875, "loss": 2.222167491912842 }, { "id": "identity_327297", "image": "Super-CLEVR/images/superCLEVR_new_001319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small bus that is right of a tiny metallic double bus behind a small red matte bus on the right side of a large gray object?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 27.015625, "loss": 4.02497673034668 }, { "id": "identity_227404", "image": "DocVQA/images/gzvx0078_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the rights granted to SME regarding signage and advertising at the Event Site, and what conditions apply to these rights?" }, { "from": "gpt", "value": "The answer is Rights to use, erect, select location, and first refusal" } ], "clip_score": 24.3125, "loss": 4.076648712158203 }, { "id": "identity_39503", "image": "DVQA/images/bar_train_00183104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset has the lowest accuracy value for the 'brief' algorithm?" }, { "from": "gpt", "value": "The answer is beard" } ], "clip_score": 33.8125, "loss": 3.117462396621704 }, { "id": "identity_151341", "image": "ScienceQA/images/4530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state located west of Indiana?" }, { "from": "gpt", "value": "The answer is Springfield, Illinois" } ], "clip_score": 23.625, "loss": 2.250481367111206 }, { "id": "identity_90693", "image": "UniGeo/images/64.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the midpoints of the sides of quadrilateral ABCD are E, F, G, and H, and the length of side AB is 12cm, what is the length of side EF?" }, { "from": "gpt", "value": "The answer is 6cm" } ], "clip_score": 31.375, "loss": 1.9234044551849365 }, { "id": "identity_295266", "image": "TabMWP/images/36091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During Ted's camping trip, he recorded the number of miles he hiked each day. On which day did Ted hike the greatest number of miles? \n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 38.96875, "loss": 3.325758457183838 }, { "id": "identity_296709", "image": "DocVQA/images/syvl0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which laboratories are mentioned on the given page?" }, { "from": "gpt", "value": "The answer is Truesdail Laboratories, INC." } ], "clip_score": 25.09375, "loss": 1.9952223300933838 }, { "id": "identity_322309", "image": "ScienceQA/images/4713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What describes the ecosystem of the Cape Breton Highlands National Park?\nChoices:\n(A) It has soil that is frozen year-round.\n(B) It has soil that is rich in nutrients.\n(C) It has long, cold winters and short, cool summers." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.03125, "loss": 3.6952311992645264 }, { "id": "identity_11783", "image": "TextVQA/images/351a2d5749219403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: does this meeting look interesting?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 23.09375, "loss": 4.617661476135254 }, { "id": "identity_40687", "image": "DVQA/images/bar_train_00003326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined value of birth and car in soap?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 19.453125, "loss": 2.315605640411377 }, { "id": "identity_133718", "image": "A-OKVQA/images/306420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fruit at the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.78125, "loss": 3.8142433166503906 }, { "id": "identity_271765", "image": "FigureQA/images/64058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Royal Blue or Light Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.703125, "loss": 3.8994452953338623 }, { "id": "identity_298936", "image": "VizWiz/images/VizWiz_train_00009872.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please provide the model number as this page is not compatible with voice over?" }, { "from": "gpt", "value": "The answer is a1429" } ], "clip_score": 16.5, "loss": 2.489238977432251 }, { "id": "identity_57761", "image": "DVQA/images/bar_train_00053332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'meal' algorithm across all datasets?" }, { "from": "gpt", "value": "The answer is 6.25" } ], "clip_score": 35.09375, "loss": 2.0367748737335205 }, { "id": "identity_147962", "image": "DocVQA/images/kzcw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time the fax was sent?" }, { "from": "gpt", "value": "The answer is 1:20 PM EST" } ], "clip_score": 21.6875, "loss": 1.9864422082901 }, { "id": "identity_320466", "image": "DocVQA/images/ghcx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is this addressed?" }, { "from": "gpt", "value": "The answer is Dr. Ei Soon Cho" } ], "clip_score": 19.203125, "loss": 3.1405961513519287 }, { "id": "identity_188145", "image": "A-OKVQA/images/408528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of musical instrument is being played in the background?" }, { "from": "gpt", "value": "The answer is Saxophone" } ], "clip_score": 16.203125, "loss": 2.051264524459839 }, { "id": "identity_170708", "image": "TabMWP/images/3751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of entries between the year with the most entries and the year with the least entries?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.5625, "loss": 2.797056198120117 }, { "id": "identity_317109", "image": "FigureQA/images/93031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the value for Sienna the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.109375, "loss": 4.141354560852051 }, { "id": "identity_49459", "image": "ChartQA/images/50253951001731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are represented in this graph?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.6875, "loss": 2.7616498470306396 }, { "id": "identity_208891", "image": "PMC-VQA/images/PMC2981854_F0002_78516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiodensity of the lesion indicated by the arrow on the right side of the image?" }, { "from": "gpt", "value": "The answer is Hyperdense" } ], "clip_score": 30.671875, "loss": 3.407853364944458 }, { "id": "identity_154819", "image": "ScienceQA/images/16625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sea located to the north of the highlighted country?" }, { "from": "gpt", "value": "The answer is Gulf of Gonâve" } ], "clip_score": 27.5, "loss": 2.468313694000244 }, { "id": "identity_331636", "image": "PlotQA/images/119053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many instances is the count of bars for a specific year different from the count of legend labels?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 28.28125, "loss": 2.4050674438476562 }, { "id": "identity_337998", "image": "Super-CLEVR/images/superCLEVR_new_001821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer large airplanes than gray rubber fighters?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.90625, "loss": 4.999881744384766 }, { "id": "identity_312732", "image": "AI2D/images/4297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given image, which layer is positioned above the upper mantle?\nChoices:\n(A) oceanic crust\n(B) continental crust\n(C) plastic asthenosphere\n(D) lithosphere" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.375, "loss": 4.625496864318848 }, { "id": "identity_185149", "image": "A-OKVQA/images/305206.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sporting event is depicted in this image?" }, { "from": "gpt", "value": "The answer is Baseball" } ], "clip_score": 26.71875, "loss": 3.948636293411255 }, { "id": "identity_229491", "image": "VizWiz/images/VizWiz_train_00008960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of each letter in the alphabet for the code in the picture?" }, { "from": "gpt", "value": "The answer is 85" } ], "clip_score": 21.28125, "loss": 2.2639458179473877 }, { "id": "identity_64570", "image": "ScienceQA/images/13913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the map?" }, { "from": "gpt", "value": "The answer is Virginia" } ], "clip_score": 32.03125, "loss": 3.0740509033203125 }, { "id": "identity_323068", "image": "TabMWP/images/4768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Joseph need to buy a photograph signed by a famous TV star and a photograph signed by a famous film star?" }, { "from": "gpt", "value": "The answer is 118" } ], "clip_score": 22.9375, "loss": 2.7005867958068848 }, { "id": "identity_51756", "image": "FigureQA/images/83994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the maximum value of Lawn Green greater than the minimum value of Rosy Brown?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.96875, "loss": 3.497926712036133 }, { "id": "identity_279354", "image": "VizWiz/images/VizWiz_train_00006838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is park shelter" } ], "clip_score": 28.125, "loss": 3.7678110599517822 }, { "id": "identity_178573", "image": "TabMWP/images/851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of times customers rented a movie?" }, { "from": "gpt", "value": "The answer is 2.4" } ], "clip_score": 28.0625, "loss": 1.983577013015747 }, { "id": "identity_170650", "image": "TabMWP/images/14032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of bull's-eyes hit by the archer?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.078125, "loss": 2.8214199542999268 }, { "id": "identity_11733", "image": "PMC-VQA/images/PMC509297_pbio-0020225-g003_250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which hippocampal sections are displayed in the images? \nChoices:\n(A) Superior, medial and inferior. \n(B) Anterior, middle and posterior. \n(C) Lateral, medial and inferior. \n(D) Posterior, medial and anterior. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.21875, "loss": 4.059098720550537 }, { "id": "identity_127782", "image": "A-OKVQA/images/406848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the beverage in the glass next to the pizza?" }, { "from": "gpt", "value": "The answer is Beer" } ], "clip_score": 26.40625, "loss": 2.8189961910247803 }, { "id": "identity_32190", "image": "PMC-VQA/images/PMC5187572_brainsci-06-00058-f005_108181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The MRI coordinates for the center of the left STN are displayed inside the _____.\nChoices:\n(A) blue rectangle \n(B) red square \n(C) green triangle \n(D) black diamond" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.359375, "loss": 3.862999677658081 }, { "id": "identity_136740", "image": "DVQA/images/bar_train_00139710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold for 'phone' across all stores?" }, { "from": "gpt", "value": "The answer is 2.67 units" } ], "clip_score": 30.3125, "loss": 1.9286531209945679 }, { "id": "identity_276721", "image": "ScienceQA/images/16955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being pointed out in the highlighted area? Is it:\n(A) The Bahamas\n(B) Cuba\n(C) Trinidad and Tobago\n(D) Jamaica" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.265625, "loss": 4.116032123565674 }, { "id": "identity_135013", "image": "DocVQA/images/llfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the zip code for Lea & Febiger?" }, { "from": "gpt", "value": "The answer is 19106" } ], "clip_score": 20.109375, "loss": 1.6453750133514404 }, { "id": "identity_263778", "image": "TabMWP/images/36946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what time does the train arrive at the restaurant district according to the schedule provided?\nChoices:\n(A) 1:30 P.M.\n(B) 11:00 A.M.\n(C) 12:55 P.M.\n(D) 9:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.515625, "loss": 4.107501983642578 }, { "id": "identity_6992", "image": "PlotQA/images/139358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it the case that in every year, the sum of the gross intake ratio in Azerbaijan and gross intake ratio in El Salvador is greater than the gross intake ratio in Hungary ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.40625, "loss": 2.717031955718994 }, { "id": "identity_322243", "image": "PMC-VQA/images/PMC4100447_fig1_306525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What echocardiography view was used to obtain this image?\nChoices:\n(A) Apical four chamber view \n(B) Subcostal long axis view \n(C) Parasternal short axis view \n(D) Suprasternal view" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.421875, "loss": 3.650409698486328 }, { "id": "identity_270422", "image": "DVQA/images/bar_train_00012460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of humor in pride?" }, { "from": "gpt", "value": "The answer is 1000" } ], "clip_score": 22.53125, "loss": 3.0768277645111084 }, { "id": "identity_225569", "image": "DVQA/images/bar_train_00074278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the objects crop and moment in the category wall, and how does this compare to the total number of people who prefer the objects circle and damage in the category coffee?" }, { "from": "gpt", "value": "The answer is 15 people prefer crop and moment in wall, which is more than 9 people who prefer circle and damage in coffee" } ], "clip_score": 36.5, "loss": 1.3897719383239746 }, { "id": "identity_124061", "image": "TextVQA/images/2b9618e6ba8d7421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is No pedestrian crossing visible" } ], "clip_score": 25.46875, "loss": 3.07476544380188 }, { "id": "identity_30436", "image": "A-OKVQA/images/544325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of setting is the gloved man standing in?\nChoices:\n(A) baseball field\n(B) ice rink\n(C) basketball court\n(D) soccer field" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.46875, "loss": 4.399920463562012 }, { "id": "identity_17876", "image": "IconQA/images/104955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many eggs are there?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 27.15625, "loss": 2.029611110687256 }, { "id": "identity_269751", "image": "UniGeo/images/2338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the triangle ABC, with DE parallel to BC, AD = 1, AB = 3, and DE = 2, what is the length of BC?\nChoices:\n(A) 2\n(B) 4\n(C) 6\n(D) 8" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.9375, "loss": 2.5623984336853027 }, { "id": "identity_226882", "image": "GeoQA+/images/1862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,BC=3,点D为AC延长线上的一点,AC=3CD,过点D作DH∥AB,交BC的延长线于点H,若∠CBD=∠A,求△ABD的面积。" }, { "from": "gpt", "value": "The answer is \\( \\frac{27}{8} \\times \\sin(\\angle A) \\)" } ], "clip_score": 31.53125, "loss": 2.3873887062072754 }, { "id": "identity_285370", "image": "GeoQA+/images/3037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, points E and F are the midpoints of BD and CE, respectively. If the area of triangle AEF is 3, what is the area of triangle ABC?\nChoices:\n(A) 9\n(B) 10\n(C) 11\n(D) 12" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.65625, "loss": 2.158684253692627 }, { "id": "identity_2345", "image": "ChartQA/images/two_col_100097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the other technology other than All?" }, { "from": "gpt", "value": "The answer is Financial technology (fintech)" } ], "clip_score": 31.671875, "loss": 1.5156699419021606 }, { "id": "identity_62769", "image": "ScienceQA/images/14223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture likely to be of the object labeled 'feather'?" }, { "from": "gpt", "value": "The answer is Soft and lightweight" } ], "clip_score": 31.59375, "loss": 2.556709051132202 }, { "id": "identity_293099", "image": "PMC-VQA/images/PMC6182756_Fig1_381276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the representation of the photo shown in the upper section?\nChoices:\n(A) A male F2 \n(B) Ovaries of a female \n(C) A female F2 \n(D) Testes of a male" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.03125, "loss": 4.100093841552734 }, { "id": "identity_262731", "image": "UniGeo/images/2473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure shown, PA and PB are tangents to circle O at points A and B, with angle P measuring 70°. What is the measure of angle ACB?\nChoices:\n(A) 15°\n(B) 40°\n(C) 75°\n(D) 55°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.46875, "loss": 2.374615430831909 }, { "id": "identity_88824", "image": "Geometry3K/images/1840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the interior angles of the parallelogram WXYZ?" }, { "from": "gpt", "value": "The answer is 360°" } ], "clip_score": 34.8125, "loss": 1.4083415269851685 }, { "id": "identity_291720", "image": "ScienceQA/images/9725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city on the map is indicated by the marking? \nChoices:\n(A) Memphis\n(B) Miami\n(C) Oklahoma City\n(D) San Antonio" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.71875, "loss": 3.581500291824341 }, { "id": "identity_288527", "image": "VQA-AS/images/11872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two items are being held by both children?" }, { "from": "gpt", "value": "The answer is toys" } ], "clip_score": 22.375, "loss": 2.7425034046173096 }, { "id": "identity_92409", "image": "IconQA/images/76847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of black marbles to purple marbles?" }, { "from": "gpt", "value": "The answer is 3:2" } ], "clip_score": 37.25, "loss": 1.575114369392395 }, { "id": "identity_227521", "image": "TabMWP/images/35994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between 1965 and 1975, which country experienced the largest decrease in per-capita aid received from the U.S.?" }, { "from": "gpt", "value": "The answer is Libya" } ], "clip_score": 37.5, "loss": 2.3880884647369385 }, { "id": "identity_44985", "image": "FigureQA/images/74246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Blue ever surpass Medium Aqua?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.09375, "loss": 3.139002561569214 }, { "id": "identity_123665", "image": "PMC-VQA/images/PMC8870907_diagnostics-12-00384-f006_203201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique was used to create this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 28.53125, "loss": 2.688706636428833 }, { "id": "identity_75155", "image": "TQA/images/question_images/human_system_eye_6146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"cornea\"?" }, { "from": "gpt", "value": "The answer is It focuses light onto the retina for clear vision" } ], "clip_score": 30.96875, "loss": 2.451608180999756 }, { "id": "identity_114336", "image": "VQA-AS/images/2529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the individuals engaged in?" }, { "from": "gpt", "value": "The answer is Sitting around a campfire" } ], "clip_score": 31.03125, "loss": 2.6645920276641846 }, { "id": "identity_10496", "image": "GeoQA+/images/5538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 一副直角三角板如图放置(∠F=∠ACB=90°,∠E=45°,∠A=60°),如果点C在FD的延长线上,点B在DE上,且AB∥CF,则∠DBC的度数为()\nChoices:\n(A) 10°\n(B) 15°\n(C) 18°\n(D) 30°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.59375, "loss": 2.7539241313934326 }, { "id": "identity_97670", "image": "IconQA/images/71909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides does this shape have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.609375, "loss": 2.860276222229004 }, { "id": "identity_246928", "image": "A-OKVQA/images/181836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three visual elements in the image that indicate the street has been cordoned off in celebration of LGBT rights, and explain how each element supports this conclusion" }, { "from": "gpt", "value": "The answer is Rainbow flag, body paint, crowd atmosphere" } ], "clip_score": 30.96875, "loss": 3.781543254852295 }, { "id": "identity_65429", "image": "ScienceQA/images/8274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 25.609375, "loss": 2.890946388244629 }, { "id": "identity_240434", "image": "IconQA/images/27218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sequence of transformations (turn, flip, slide) has been applied to the letter 'P' to obtain the letter 'd'?" }, { "from": "gpt", "value": "The answer is Turn 90 degrees clockwise, then flip horizontally" } ], "clip_score": 25.859375, "loss": 1.7053136825561523 }, { "id": "identity_174300", "image": "TabMWP/images/12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings have an odd number of floors?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 23.203125, "loss": 2.6838769912719727 }, { "id": "identity_161851", "image": "GeoQA+/images/1487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADE?" }, { "from": "gpt", "value": "The answer is ADE = 90 degrees" } ], "clip_score": 31.375, "loss": 1.6933566331863403 }, { "id": "identity_16304", "image": "VizWiz/images/VizWiz_train_00005901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's this?" }, { "from": "gpt", "value": "The answer is bouquet roses" } ], "clip_score": 26.265625, "loss": 3.0262513160705566 }, { "id": "identity_56083", "image": "DocVQA/images/zxmw0023_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the R.J.R Region number for this agreement?" }, { "from": "gpt", "value": "The answer is 5600" } ], "clip_score": 21.796875, "loss": 2.6572370529174805 }, { "id": "identity_134609", "image": "DVQA/images/bar_train_00066571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value in the chart?" }, { "from": "gpt", "value": "The answer is tour" } ], "clip_score": 31.375, "loss": 3.4738693237304688 }, { "id": "identity_95125", "image": "IconQA/images/31783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are there in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.640625, "loss": 2.0213990211486816 }, { "id": "identity_71965", "image": "TQA/images/question_images/human_system_nervous_6180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the brain is responsible for processing visual information?" }, { "from": "gpt", "value": "The answer is Occipital lobe" } ], "clip_score": 32.46875, "loss": 1.46017587184906 }, { "id": "identity_216395", "image": "A-OKVQA/images/153692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the direction the bicyclist is facing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 28.890625, "loss": 3.014065980911255 }, { "id": "identity_167298", "image": "TabMWP/images/32701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the y-values in the table?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.09375, "loss": 2.826404333114624 }, { "id": "identity_2224", "image": "VQA2.0/images/291717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bikes secure?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.078125, "loss": 2.577255964279175 }, { "id": "identity_32808", "image": "AI2D/images/4110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If there are no more trees what will happen to air?\nChoices:\n(A) Nothing will happen\n(B) No oxygen for air\n(C) Not sure\n(D) Air will be good" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.796875, "loss": 4.451494216918945 }, { "id": "identity_85390", "image": "UniGeo/images/3319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if OD is perpendicular to BC and angle BOD is 60 degrees, what is the measure of angle BOC?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 35.96875, "loss": 1.4460359811782837 }, { "id": "identity_232402", "image": "A-OKVQA/images/380348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item in the image is designed to provide comfort while allowing a gentle rocking motion, and is typically found in a living room or nursery?" }, { "from": "gpt", "value": "The answer is chair" } ], "clip_score": 14.28125, "loss": 3.4690933227539062 }, { "id": "identity_212324", "image": "VQA2.0/images/454712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hot dogs are visible on the roller grill?" }, { "from": "gpt", "value": "The answer is Eleven" } ], "clip_score": 31.375, "loss": 3.50711727142334 }, { "id": "identity_137409", "image": "DocVQA/images/jlbv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first action listed on the routing slip?" }, { "from": "gpt", "value": "The answer is APPROVAL" } ], "clip_score": 18.6875, "loss": 3.426676034927368 }, { "id": "identity_231422", "image": "TQA/images/question_images/human_system_circulatory_6065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chambers does the heart have, and what are their names?" }, { "from": "gpt", "value": "The answer is Four chambers: right atrium, left atrium, right ventricle, left ventricle" } ], "clip_score": 31.265625, "loss": 1.2116528749465942 }, { "id": "identity_167419", "image": "TabMWP/images/10454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the store continues to sell balloons at the same rate as Thursday for the next 3 days, how many balloons will be sold in those 3 days?" }, { "from": "gpt", "value": "The answer is 699 balloons" } ], "clip_score": 38.1875, "loss": 2.0446114540100098 }, { "id": "identity_17823", "image": "TabMWP/images/9995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A soccer coach asked the members of his team how many times they played soccer last week. How many people played soccer fewer than 2 times?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 36.65625, "loss": 2.2011218070983887 }, { "id": "identity_213054", "image": "A-OKVQA/images/329139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the surface of the road like?" }, { "from": "gpt", "value": "The answer is Wet or recently rained on" } ], "clip_score": 15.8671875, "loss": 3.2111997604370117 }, { "id": "identity_244289", "image": "PlotQA/images/120923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of the population having access to electricity in 1990, 2000, and 2010 for all the countries combined?" }, { "from": "gpt", "value": "The answer is 220, 240, 260" } ], "clip_score": 34.59375, "loss": 1.850766897201538 }, { "id": "identity_176463", "image": "IconQA/images/23739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 100 times, how many times is it expected to land on cyan?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 26.296875, "loss": 2.2274413108825684 }, { "id": "identity_220711", "image": "TextVQA/images/eaec8e244807ffbf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the jersey worn by the player with the number 18?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.71875, "loss": 4.129286289215088 }, { "id": "identity_142953", "image": "DVQA/images/bar_train_00111743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the category labeled \"talk\"?" }, { "from": "gpt", "value": "The answer is 0 to 6" } ], "clip_score": 23.578125, "loss": 2.055525541305542 }, { "id": "identity_91970", "image": "IconQA/images/77766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you group the diamonds into pairs, how many pairs can you form?" }, { "from": "gpt", "value": "The answer is 2 pairs with 1 diamond left over" } ], "clip_score": 31.828125, "loss": 2.0892832279205322 }, { "id": "identity_178934", "image": "TabMWP/images/2661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students sold an even number of boxes?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 24.96875, "loss": 2.678598642349243 }, { "id": "identity_27226", "image": "CLEVR-Math/images/CLEVR_train_062953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all large gray blocks. Subtract all yellow matte things. How many objects are left?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.765625, "loss": 2.302846908569336 }, { "id": "identity_332309", "image": "GeoQA+/images/4129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O的半径为5,弦AB=8,点C是AB的中点,连接OC,则OC的长是多少?\n选项:\n(A) 1\n(B) 2\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.9375, "loss": 2.412595748901367 }, { "id": "identity_319222", "image": "TextVQA/images/7820d140f82150ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which motor company is shown?" }, { "from": "gpt", "value": "The answer is toyota" } ], "clip_score": 20.109375, "loss": 2.223708391189575 }, { "id": "identity_29865", "image": "TextVQA/images/480359c55a3e9686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the current batters name?" }, { "from": "gpt", "value": "The answer is jerry hairston" } ], "clip_score": 23.9375, "loss": 1.80863356590271 }, { "id": "identity_331074", "image": "ScienceQA/images/1524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the place shown?\nChoices:\n(A) Michigan\n(B) Massachusetts\n(C) Rhode Island\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.21875, "loss": 4.7696919441223145 }, { "id": "identity_117490", "image": "PMC-VQA/images/PMC7729070_F1_318361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is shown in the image?" }, { "from": "gpt", "value": "The answer is T1-weighted MRI sequence" } ], "clip_score": 31.734375, "loss": 1.6660118103027344 }, { "id": "identity_192198", "image": "PMC-VQA/images/PMC2966750_F0001_77672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any medical devices or implants be seen in this image?" }, { "from": "gpt", "value": "The answer is No, there are no medical devices or implants visible" } ], "clip_score": 32.4375, "loss": 1.6466033458709717 }, { "id": "identity_120990", "image": "Super-CLEVR/images/superCLEVR_new_000576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly behind the purple car?" }, { "from": "gpt", "value": "The answer is A pickup truck" } ], "clip_score": 31.953125, "loss": 2.450413703918457 }, { "id": "identity_221990", "image": "VQA2.0/images/174615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of year does the scene likely depict?" }, { "from": "gpt", "value": "The answer is Winter" } ], "clip_score": 19.25, "loss": 3.609722137451172 }, { "id": "identity_1925", "image": "TabMWP/images/35970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Sofia's classmates revealed how many science articles they read. What is the mean of the numbers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.5625, "loss": 2.5934603214263916 }, { "id": "identity_71069", "image": "ScienceQA/images/20598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which molecule has the highest number of atoms?" }, { "from": "gpt", "value": "The answer is ethane" } ], "clip_score": 29.375, "loss": 2.408327341079712 }, { "id": "identity_319446", "image": "AI2D/images/2688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which season does the south pole receive no sunlight?\nChoices:\n(A) fall\n(B) winter\n(C) summer\n(D) spring" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.296875, "loss": 4.930913925170898 }, { "id": "identity_43599", "image": "MapQA/images/map_8314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest number of officers?" }, { "from": "gpt", "value": "The answer is A state in the range 5121086-5216923" } ], "clip_score": 34.0, "loss": 2.719181776046753 }, { "id": "identity_170983", "image": "TabMWP/images/8488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of the most expensive and the least expensive stands combined?" }, { "from": "gpt", "value": "The answer is $14,145.00" } ], "clip_score": 29.875, "loss": 1.8210633993148804 }, { "id": "identity_260624", "image": "Geometry3K/images/557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus LMPQ, if m∠QLM = 2x^2 - 10, m∠QPM = 8x, and MP = 10, find the value of x and then determine the measure of angle LQM" }, { "from": "gpt", "value": "The answer is approximately 70 degrees" } ], "clip_score": 36.625, "loss": 4.036100387573242 }, { "id": "identity_162142", "image": "GeoQA+/images/1716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 1 is 10 degrees, what is the sum of the measures of angle 1, angle 2, angle α, and angle 3?" }, { "from": "gpt", "value": "The answer is 360 degrees" } ], "clip_score": 28.015625, "loss": 2.2017574310302734 }, { "id": "identity_215177", "image": "A-OKVQA/images/315037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man's backpack placed next to?" }, { "from": "gpt", "value": "The answer is Laptop" } ], "clip_score": 25.640625, "loss": 4.023682117462158 }, { "id": "identity_221491", "image": "VQA2.0/images/447108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation can be seen in the image?" }, { "from": "gpt", "value": "The answer is Grass, a broad-leaved tree, and shrubs or bushes" } ], "clip_score": 18.09375, "loss": 2.739912509918213 }, { "id": "identity_260366", "image": "MapQA/images/map_2801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of puppets in the states that have a value range of 250228-258745 and 259121-274499?" }, { "from": "gpt", "value": "The answer is 3127779" } ], "clip_score": 28.03125, "loss": 2.511115789413452 }, { "id": "identity_151973", "image": "ScienceQA/images/1458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is not used in the construction of any of the objects shown?" }, { "from": "gpt", "value": "The answer is Metal" } ], "clip_score": 24.75, "loss": 3.421729326248169 }, { "id": "identity_213080", "image": "A-OKVQA/images/183327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of construction vehicle is depicted by the toy on the mound of dirt?" }, { "from": "gpt", "value": "The answer is Toy bulldozer" } ], "clip_score": 22.546875, "loss": 2.505382537841797 }, { "id": "identity_95170", "image": "CLEVR-Math/images/CLEVR_train_009426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.171875, "loss": 2.495950937271118 }, { "id": "identity_156762", "image": "TQA/images/question_images/simple_machines_8197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the resistance force when the wheel is turned as indicated?" }, { "from": "gpt", "value": "The answer is Upwards" } ], "clip_score": 29.8125, "loss": 3.892817258834839 }, { "id": "identity_213115", "image": "A-OKVQA/images/3220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the player's jersey to the left of the mascot with the large head?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 18.40625, "loss": 2.3872387409210205 }, { "id": "identity_227488", "image": "Super-CLEVR/images/superCLEVR_new_000763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are the same color as the shiny bicycle?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 30.90625, "loss": 3.9862208366394043 }, { "id": "identity_100027", "image": "IconQA/images/29540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of squares occupied by the top horizontal line of the letter on the left and the letter on the right?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.765625, "loss": 2.1831657886505127 }, { "id": "identity_97831", "image": "IconQA/images/86593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the minute hand moves counter-clockwise, how many minutes will it take to reach the 9?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 27.78125, "loss": 1.9645278453826904 }, { "id": "identity_137258", "image": "DVQA/images/bar_train_00118185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is preferred by the least number of people?" }, { "from": "gpt", "value": "The answer is thick" } ], "clip_score": 26.859375, "loss": 3.8946025371551514 }, { "id": "identity_193307", "image": "A-OKVQA/images/300753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main activity happening in the image?" }, { "from": "gpt", "value": "The answer is A baseball game is being played" } ], "clip_score": 27.015625, "loss": 3.0773935317993164 }, { "id": "identity_310491", "image": "ScienceQA/images/20281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.0, "loss": 3.520108461380005 }, { "id": "identity_268448", "image": "Super-CLEVR/images/superCLEVR_new_000338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a big yellow motorbike to the right of the chopper that is behind the green bicycle?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.96875, "loss": 5.497771263122559 }, { "id": "identity_100782", "image": "IconQA/images/43782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many straight edges does this shape have?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.71875, "loss": 2.1956913471221924 }, { "id": "identity_282165", "image": "PMC-VQA/images/PMC8582216_Fig4_108229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which patient exhibited the most significant improvement in their radiographs after the operation?\nChoices:\n(A) Patient 1\n(B) Patient 4\n(C) Patient 2\n(D) Patient 8" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.9375, "loss": 3.4743475914001465 }, { "id": "identity_102052", "image": "IconQA/images/89783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times larger is the white area compared to the pink area?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.40625, "loss": 2.13714599609375 }, { "id": "identity_7631", "image": "UniGeo/images/1915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,∠C=90°,∠B=20°,以A为圆心,任意长为半径画弧分别交AB于点M和N,再分别以M、N为圆心,大于MN的长为半径画弧,两弧交于P,连接AP并延长交于点D,则∠ADB的度数为()\nChoices:\n(A) 75°\n(B) 105°\n(C) 110°\n(D) 125°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.4375, "loss": 2.410195827484131 }, { "id": "identity_191631", "image": "A-OKVQA/images/412247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Early morning or late afternoon" } ], "clip_score": 20.234375, "loss": 3.363783121109009 }, { "id": "identity_172491", "image": "TabMWP/images/25635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of science articles read by all of Farid's classmates combined?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 36.875, "loss": 2.091909885406494 }, { "id": "identity_170825", "image": "IconQA/images/35589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 100 times, how many times can we expect it to land on pink?" }, { "from": "gpt", "value": "The answer is less than 25" } ], "clip_score": 31.59375, "loss": 2.8375463485717773 }, { "id": "identity_328199", "image": "IconQA/images/79009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.40625, "loss": 4.090943336486816 }, { "id": "identity_185979", "image": "PMC-VQA/images/PMC7467987_Figure1_247139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a mass or tumor in this lung image?" }, { "from": "gpt", "value": "The answer is No evidence of mass or tumor" } ], "clip_score": 30.171875, "loss": 2.8252487182617188 }, { "id": "identity_214615", "image": "PMC-VQA/images/PMC4683868_Fig7_454414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is predominantly visible in this image?" }, { "from": "gpt", "value": "The answer is Lymphoid tissue" } ], "clip_score": 22.390625, "loss": 2.719315528869629 }, { "id": "identity_169770", "image": "IconQA/images/95755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is colored pink?" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 33.46875, "loss": 1.4515972137451172 }, { "id": "identity_155449", "image": "TQA/images/question_images/photosynthesis_4097.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is being depicted in this image?" }, { "from": "gpt", "value": "The answer is Photosynthesis" } ], "clip_score": 30.515625, "loss": 2.1421761512756348 }, { "id": "identity_207116", "image": "A-OKVQA/images/543551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicles can be seen in the water?" }, { "from": "gpt", "value": "The answer is Boats and yachts" } ], "clip_score": 22.453125, "loss": 2.647210121154785 }, { "id": "identity_257319", "image": "A-OKVQA/images/123535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sheep are in the blue trailer, and what types are they?" }, { "from": "gpt", "value": "The answer is Three sheep: one ewe, two unidentified" } ], "clip_score": 29.890625, "loss": 3.710095167160034 }, { "id": "identity_190814", "image": "VQA2.0/images/21212.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pathway is shown in the image?" }, { "from": "gpt", "value": "The answer is Paved pathway" } ], "clip_score": 18.875, "loss": 3.0790457725524902 }, { "id": "identity_329987", "image": "TextVQA/images/14860b06ba90af45.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this?" }, { "from": "gpt", "value": "The answer is breecia" } ], "clip_score": 16.921875, "loss": 5.611793041229248 }, { "id": "identity_196306", "image": "PMC-VQA/images/PMC8319602_F1_22092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure can be seen in the center of the image, surrounded by white?" }, { "from": "gpt", "value": "The answer is Spinal canal" } ], "clip_score": 27.8125, "loss": 4.222468852996826 }, { "id": "identity_15231", "image": "AI2D/images/3113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is C called?\nChoices:\n(A) Petiole\n(B) Axil\n(C) Bud\n(D) Apex" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.40625, "loss": 3.8258628845214844 }, { "id": "identity_175687", "image": "IconQA/images/107178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the total squares is purple?" }, { "from": "gpt", "value": "The answer is 5/16" } ], "clip_score": 33.15625, "loss": 1.739276647567749 }, { "id": "identity_234947", "image": "ChartQA/images/multi_col_40659.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the differences between the 2015 and 2016 values for each category?" }, { "from": "gpt", "value": "The answer is 24%" } ], "clip_score": 24.765625, "loss": 2.1629528999328613 }, { "id": "identity_137207", "image": "MapQA/images/map_9120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of donations in Texas?" }, { "from": "gpt", "value": "The answer is 66.2%-66.9%" } ], "clip_score": 34.53125, "loss": 1.8093152046203613 }, { "id": "identity_196135", "image": "TextVQA/images/3f8259701fe53234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the clock's hands?" }, { "from": "gpt", "value": "The answer is Arrow-shaped" } ], "clip_score": 19.921875, "loss": 2.632197856903076 }, { "id": "identity_71782", "image": "AI2D/images/3096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the fluid that fills the interior of the cell?" }, { "from": "gpt", "value": "The answer is Cytoplasm" } ], "clip_score": 32.75, "loss": 2.0258095264434814 }, { "id": "identity_118359", "image": "TextVQA/images/3f4a8ac091ec3868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number provided for ordering or calling Mindcraft?" }, { "from": "gpt", "value": "The answer is (800) 525-4933" } ], "clip_score": 20.984375, "loss": 1.748156189918518 }, { "id": "identity_74982", "image": "TQA/images/question_images/seasons_2518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What date is the Winter Solstice marked on the image?" }, { "from": "gpt", "value": "The answer is December 22" } ], "clip_score": 33.75, "loss": 1.7778468132019043 }, { "id": "identity_72746", "image": "ScienceQA/images/3605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the shoe's toe?" }, { "from": "gpt", "value": "The answer is Rounded toe" } ], "clip_score": 26.46875, "loss": 3.841911792755127 }, { "id": "identity_319498", "image": "DVQA/images/bar_train_00190604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this object preferred by more people than drunk?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.78125, "loss": 3.7097039222717285 }, { "id": "identity_41568", "image": "FigureQA/images/73442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Medium Turquoise?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 23.640625, "loss": 3.198098659515381 }, { "id": "identity_174667", "image": "IconQA/images/53715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dog pictures are there?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.890625, "loss": 2.2177693843841553 }, { "id": "identity_41912", "image": "FigureQA/images/62257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate y-axis value of the Olive line at x-axis value 50?" }, { "from": "gpt", "value": "The answer is 66" } ], "clip_score": 29.375, "loss": 2.9994006156921387 }, { "id": "identity_201785", "image": "PMC-VQA/images/PMC5908617_F1_304248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the large dark space in the upper right quadrant of the image?" }, { "from": "gpt", "value": "The answer is Maxillary sinus" } ], "clip_score": 29.359375, "loss": 2.512225866317749 }, { "id": "identity_262321", "image": "VQA2.0/images/361375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the man?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 14.3125, "loss": 3.610426664352417 }, { "id": "identity_121359", "image": "Super-CLEVR/images/superCLEVR_new_000849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that has a human-like shape?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.15625, "loss": 3.145094394683838 }, { "id": "identity_262059", "image": "VQA-AS/images/13203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the color of the couch?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 25.859375, "loss": 2.917055368423462 }, { "id": "identity_173727", "image": "IconQA/images/81447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the degree measure of the angle for the yellow section of the spinner?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 30.171875, "loss": 1.7982542514801025 }, { "id": "identity_126865", "image": "A-OKVQA/images/196944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people in the image are wearing hats?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.578125, "loss": 4.710864067077637 }, { "id": "identity_291200", "image": "TQA/images/question_images/earth_poles_8059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the degree measurement of the Arctic Circle's latitude in the north?\nChoices:\n(A) 55.5\n(B) 90\n(C) 66.5\n(D) 25.5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.53125, "loss": 4.658146381378174 }, { "id": "identity_187266", "image": "PMC-VQA/images/PMC7933329_Fig1_384362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is shown in the image?" }, { "from": "gpt", "value": "The answer is Angiogram" } ], "clip_score": 31.21875, "loss": 2.861194610595703 }, { "id": "identity_120298", "image": "A-OKVQA/images/260627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible power lines are above the train?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.65625, "loss": 2.3774187564849854 }, { "id": "identity_192272", "image": "PMC-VQA/images/PMC7211214_Fig1_165914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of brain imaging technique is used in this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 36.28125, "loss": 2.708442449569702 }, { "id": "identity_252534", "image": "ChartQA/images/6802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and lowest positive values of the yellow bars?" }, { "from": "gpt", "value": "The answer is 31" } ], "clip_score": 16.734375, "loss": 2.7776215076446533 }, { "id": "identity_3427", "image": "DVQA/images/bar_train_00162714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the least number of units summed across all the stores?" }, { "from": "gpt", "value": "The answer is stairs" } ], "clip_score": 32.1875, "loss": 2.7730190753936768 }, { "id": "identity_283880", "image": "TabMWP/images/17920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the revenue data collected by a food industry researcher, which pizzeria generated more income from supreme pizzas, Taste of New York or Pizza Italia?\nChoices:\n(A) Taste of New York\n(B) Pizza Italia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.5, "loss": 3.234048366546631 }, { "id": "identity_281388", "image": "DVQA/images/bar_train_00027731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among all the datasets, which algorithm has the lowest total accuracy?" }, { "from": "gpt", "value": "The answer is input" } ], "clip_score": 32.125, "loss": 4.267904758453369 }, { "id": "identity_233792", "image": "A-OKVQA/images/247909.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total price if someone buys one of each type of stuffed animal on display?" }, { "from": "gpt", "value": "The answer is $93" } ], "clip_score": 27.25, "loss": 2.8286936283111572 }, { "id": "identity_271751", "image": "DVQA/images/bar_train_00170098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.484375, "loss": 3.762505292892456 }, { "id": "identity_333348", "image": "PMC-VQA/images/PMC5728979_F1_252867.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images were utilized in this study?\nChoices:\n(A) Axial T1-weighted images\n(B) Coronal T2-weighted images\n(C) Sagittal proton density images\n(D) Transaxial diffusion-weighted images" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.28125, "loss": 3.8104491233825684 }, { "id": "identity_181665", "image": "CLEVR-Math/images/CLEVR_train_020034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.671875, "loss": 2.5375797748565674 }, { "id": "identity_242524", "image": "GeoQA+/images/63.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A,B,C,D四个点均在⊙O上,AO∥DC,∠AOD=20°,求∠ABC的度数。" }, { "from": "gpt", "value": "The answer is 140°" } ], "clip_score": 34.125, "loss": 1.8163161277770996 }, { "id": "identity_235691", "image": "A-OKVQA/images/318820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of produce visible in the background, and how many different types can you identify?" }, { "from": "gpt", "value": "The answer is Four types of produce" } ], "clip_score": 26.546875, "loss": 4.8218512535095215 }, { "id": "identity_254464", "image": "VQA2.0/images/212501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many traffic lights are there on the traffic light tree, and what is the average number of lights per branch if the tree has 5 branches?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 34.0, "loss": 3.25394606590271 }, { "id": "identity_33653", "image": "IconQA/images/76312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are there?\nChoices:\n(A) 3\n(B) 2\n(C) 1\n(D) 5\n(E) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.609375, "loss": 3.4070770740509033 }, { "id": "identity_229695", "image": "TabMWP/images/8112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each table has 2 plates, how many plates are on 5 tables?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 36.46875, "loss": 1.7529242038726807 }, { "id": "identity_330882", "image": "FigureQA/images/12172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Light Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.796875, "loss": 3.793485164642334 }, { "id": "identity_286222", "image": "PMC-VQA/images/PMC6097448_Fig1_355565.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which enhancement pattern is observed in the CT images of the neck? \nOptions:\n(A) peripheral enhancement pattern \n(B) central enhancement pattern \n(C) rim enhancement pattern \n(D) non-enhanced pattern" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.046875, "loss": 4.065057277679443 }, { "id": "identity_276486", "image": "TQA/images/abc_question_images/earth_eclipses_11656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the umbra from the following choices:\n(A) W\n(B) R\n(C) U\n(D) F" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.109375, "loss": 3.940852403640747 }, { "id": "identity_8877", "image": "ScienceQA/images/13305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) New Jersey\n(B) Ohio\n(C) Connecticut\n(D) Delaware" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.0625, "loss": 4.51146125793457 }, { "id": "identity_104266", "image": "VizWiz/images/VizWiz_train_00016435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the border at the bottom of the cup?" }, { "from": "gpt", "value": "The answer is A multicolored geometric pattern" } ], "clip_score": 20.375, "loss": 3.212844133377075 }, { "id": "identity_295938", "image": "UniGeo/images/1366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, triangle ABC has side AC intersecting with circle O at points C and D, passing through the center O. Side AB is tangent to circle O at point B, with angle A measuring 30°. What is the measure of angle C? \n(A) 40°\n(B) 30°\n(C) 60°\n(D) 45°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.40625, "loss": 2.567011594772339 }, { "id": "identity_213856", "image": "A-OKVQA/images/263104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people waiting at the bus stop?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.046875, "loss": 2.6624789237976074 }, { "id": "identity_318229", "image": "VQA2.0/images/246880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this picture look like it's decorated for something special?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.015625, "loss": 3.0526890754699707 }, { "id": "identity_210301", "image": "PMC-VQA/images/PMC6514081_Fig3_478496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process might be occurring in the area labeled with the letter 'ch'?" }, { "from": "gpt", "value": "The answer is Transcription or replication" } ], "clip_score": 21.828125, "loss": 3.8281238079071045 }, { "id": "identity_54336", "image": "FigureQA/images/87241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Medium Orchid at x-axis label 20?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 26.265625, "loss": 2.634125232696533 }, { "id": "identity_151477", "image": "ScienceQA/images/15624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 32.28125, "loss": 4.374170780181885 }, { "id": "identity_318295", "image": "IconQA/images/29671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many of these shapes are there?\nChoices:\n(A) 8\n(B) 7\n(C) 2\n(D) 5\n(E) 6" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 20.671875, "loss": 3.163666248321533 }, { "id": "identity_216639", "image": "PMC-VQA/images/PMC4020501_fig2_288340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the cystic mass in comparison to the bladder?" }, { "from": "gpt", "value": "The answer is Slightly smaller than the bladder" } ], "clip_score": 30.140625, "loss": 2.2216601371765137 }, { "id": "identity_325143", "image": "TabMWP/images/36017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A caterer at a birthday party counted the juice cups on the table. The cups had different flavored juices and different shaped straws. What is the probability that a randomly chosen cup has a heart-shaped straw and contains orange juice? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 3/20" } ], "clip_score": 24.71875, "loss": 1.7242234945297241 }, { "id": "identity_215274", "image": "VQA2.0/images/372121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many whole pizzas are visible on the table?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.953125, "loss": 2.431640863418579 }, { "id": "identity_337851", "image": "PlotQA/images/152396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum total debt across these years?" }, { "from": "gpt", "value": "The answer is 2386277000" } ], "clip_score": 29.15625, "loss": 2.338831663131714 }, { "id": "identity_286832", "image": "VQA2.0/images/254743.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the water?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 16.078125, "loss": 3.9596056938171387 }, { "id": "identity_122900", "image": "VQA2.0/images/61832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many straps are visible on the blue bag?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.75, "loss": 4.768014430999756 }, { "id": "identity_200003", "image": "TextVQA/images/a7bc28d28e8efae4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of parking is advertised on the building sign?" }, { "from": "gpt", "value": "The answer is Self Park" } ], "clip_score": 23.046875, "loss": 3.7049458026885986 }, { "id": "identity_11159", "image": "A-OKVQA/images/473427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number written on top of the middle green bananas?\nChoices:\n(A) 500\n(B) 400\n(C) 300\n(D) 600" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.40625, "loss": 3.713533401489258 }, { "id": "identity_233475", "image": "AI2D/images/305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the cougar had more competition over elk, what would most likely happen to the population of primary consumers and producers?" }, { "from": "gpt", "value": "The answer is Primary consumers decrease, producers increase" } ], "clip_score": 26.90625, "loss": 2.916851758956909 }, { "id": "identity_166035", "image": "IconQA/images/92038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all the shapes combined?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 27.375, "loss": 1.8115869760513306 }, { "id": "identity_227990", "image": "A-OKVQA/images/116004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest the event is a parade and not a marathon, mass protest, or fire sale?" }, { "from": "gpt", "value": "The answer is parade passing" } ], "clip_score": 25.375, "loss": 5.602042198181152 }, { "id": "identity_165854", "image": "Geometry3K/images/95.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle CAB?" }, { "from": "gpt", "value": "The answer is 85°" } ], "clip_score": 32.15625, "loss": 2.4518802165985107 }, { "id": "identity_154311", "image": "TQA/images/question_images/em_spectrum_9087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of ray on the electromagnetic spectrum has the lowest energy as per the image?" }, { "from": "gpt", "value": "The answer is radio" } ], "clip_score": 30.3125, "loss": 3.129934072494507 }, { "id": "identity_190139", "image": "PMC-VQA/images/PMC4861721_F5_29095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue circles are drawn on the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.9375, "loss": 2.2778918743133545 }, { "id": "identity_314509", "image": "FigureQA/images/78480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Khaki?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5390625, "loss": 4.230250835418701 }, { "id": "identity_66855", "image": "ScienceQA/images/7558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do the letters 'N' and 'S' represent on the magnets?" }, { "from": "gpt", "value": "The answer is North and South poles" } ], "clip_score": 26.5625, "loss": 2.080815076828003 }, { "id": "identity_165743", "image": "GeoQA+/images/5017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AD is the diameter of circle O and ∠C = 63°, what is the measure of ∠CAB?" }, { "from": "gpt", "value": "The answer is 27°" } ], "clip_score": 36.15625, "loss": 2.1929092407226562 }, { "id": "identity_128495", "image": "A-OKVQA/images/366173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the man riding?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 28.953125, "loss": 3.134934186935425 }, { "id": "identity_75190", "image": "AI2D/images/501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of food for the smaller fish?" }, { "from": "gpt", "value": "The answer is Shrimp and worms" } ], "clip_score": 27.703125, "loss": 2.0954625606536865 }, { "id": "identity_5013", "image": "ChartQA/images/18051975000655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two countries are being compared in the given graph?" }, { "from": "gpt", "value": "The answer is [Israel, Slovenia]" } ], "clip_score": 12.671875, "loss": 2.5493881702423096 }, { "id": "identity_307620", "image": "DocVQA/images/qjwh0078_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the 'Estimated Volume' mentioned?" }, { "from": "gpt", "value": "The answer is 44,200" } ], "clip_score": 16.21875, "loss": 2.139195442199707 }, { "id": "identity_338517", "image": "VQA-AS/images/13049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of the flowers are yellow?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.84375, "loss": 2.575615644454956 }, { "id": "identity_245873", "image": "AI2D/images/74.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the complete cycle of carbon from the atmosphere to fossil fuels and back to the atmosphere, including the role of photosynthesis" }, { "from": "gpt", "value": "The answer is Photosynthesis, respiration, decomposition, fossilization, combustion" } ], "clip_score": 34.15625, "loss": 1.7294398546218872 }, { "id": "identity_21159", "image": "DocVQA/images/jzgj0223_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value given under \"Total liabilities\"?" }, { "from": "gpt", "value": "The answer is 307.6" } ], "clip_score": 19.125, "loss": 2.4163029193878174 }, { "id": "identity_13250", "image": "AI2D/images/2475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: WHICH STAGE APPEARS IN WINTER?\nChoices:\n(A) QUEEN FOR AGING\n(B) NEST MAKING\n(C) QUEEN HIBERNATES\n(D) QUEEN ANDMALES" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.234375, "loss": 4.60360050201416 }, { "id": "identity_31745", "image": "PlotQA/images/147112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it the case that in every year, the sum of the percentage of male workers in agriculture and percentage of male workers in industry is greater than the percentage of male workers in services ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.59375, "loss": 3.095656156539917 }, { "id": "identity_295829", "image": "A-OKVQA/images/85225.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is the least likely to be seen in this setting?\nChoices:\n(A) tiger\n(B) dog\n(C) horse\n(D) cat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 1.62109375, "loss": 4.246155738830566 }, { "id": "identity_315464", "image": "IconQA/images/84295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dave is getting ready in the morning. The clock in his room shows a time. What time is displayed?\nChoices:\n(A) 9:00 P.M.\n(B) 9:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.25, "loss": 3.9076716899871826 }, { "id": "identity_77970", "image": "AI2D/images/2515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the intensity of sunlight affect the temperature on Earth's surface?" }, { "from": "gpt", "value": "The answer is Direct sunlight warms the ground more" } ], "clip_score": 26.359375, "loss": 3.7729570865631104 }, { "id": "identity_157869", "image": "AI2D/images/2970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as Dorsal Root Ganglion?" }, { "from": "gpt", "value": "The answer is It transmits sensory information to the spinal cord" } ], "clip_score": 33.3125, "loss": 1.9978803396224976 }, { "id": "identity_312277", "image": "GeoQA+/images/4286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在菱形ABCD中,AC=8,BD=6,则该菱形的边长为多少?\n选项:\n(A) 4\n(B) 5\n(C) 6\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 2.5818114280700684 }, { "id": "identity_284583", "image": "IconQA/images/21262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.125, "loss": 2.9465172290802 }, { "id": "identity_150296", "image": "ScienceQA/images/10870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the denomination of the coin depicted in the image?" }, { "from": "gpt", "value": "The answer is One cent" } ], "clip_score": 30.0, "loss": 3.8788204193115234 }, { "id": "identity_317329", "image": "DVQA/images/bar_train_00002385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm foam in this dataset?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 36.21875, "loss": 3.1932151317596436 }, { "id": "identity_107237", "image": "PMC-VQA/images/PMC5072663_FIG1_86034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image grayscale or colored?" }, { "from": "gpt", "value": "The answer is Grayscale" } ], "clip_score": 23.21875, "loss": 2.9266369342803955 }, { "id": "identity_335788", "image": "A-OKVQA/images/218155.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What weather is the person with the bike most ready for today?\n\nChoices:\n(A) tornado\n(B) rain\n(C) earthquake\n(D) tsunami" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.734375, "loss": 4.945623874664307 }, { "id": "identity_244006", "image": "TQA/images/question_images/human_system_digestive_1393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the pathway food takes from the mouth to the rectum, naming each organ it passes through in order" }, { "from": "gpt", "value": "The answer is Mouth, esophagus, stomach, small intestine, large intestine, rectum" } ], "clip_score": 34.71875, "loss": 0.8396069407463074 }, { "id": "identity_124511", "image": "Super-CLEVR/images/superCLEVR_new_000159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are there in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.15625, "loss": 4.616656303405762 }, { "id": "identity_261611", "image": "UniGeo/images/3353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, ⊙O is the circumcircle of △ACD, AB is the diameter of ⊙O, and ∠BAD is 50°. What is the measure of angle ∠C? \nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.875, "loss": 2.470453977584839 }, { "id": "identity_172206", "image": "IconQA/images/101795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of paw images to house images?" }, { "from": "gpt", "value": "The answer is 3:4" } ], "clip_score": 28.25, "loss": 1.5957517623901367 }, { "id": "identity_51235", "image": "DocVQA/images/hpwn0020_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of stores targeted for distribution?" }, { "from": "gpt", "value": "The answer is 1,000 stores" } ], "clip_score": 14.5859375, "loss": 1.412548542022705 }, { "id": "identity_216905", "image": "TextVQA/images/4d01ee8dd964bfa2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any part of the module that appears to be open?" }, { "from": "gpt", "value": "The answer is Yes, a hatch is open" } ], "clip_score": 14.1484375, "loss": 2.8335344791412354 }, { "id": "identity_259273", "image": "ChartQA/images/4877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of U.S. concern over China's economic strength from 2012 to 2018?" }, { "from": "gpt", "value": "The answer is 54.25" } ], "clip_score": 35.9375, "loss": 2.325834274291992 }, { "id": "identity_111531", "image": "TextVQA/images/aec1f662f074dd76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are seated at the table?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.1875, "loss": 4.074941158294678 }, { "id": "identity_112634", "image": "VQA-AS/images/8683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season does the image likely represent?" }, { "from": "gpt", "value": "The answer is Autumn or fall" } ], "clip_score": 21.65625, "loss": 3.4818854331970215 }, { "id": "identity_139746", "image": "DocVQA/images/qskd0228_20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To which committee was the bill referred?" }, { "from": "gpt", "value": "The answer is Committee on Interstate and Foreign Commerce" } ], "clip_score": 31.703125, "loss": 1.821766972541809 }, { "id": "identity_287840", "image": "TabMWP/images/33260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magazines counted by Aaliyah, a journalism student, had a minimum of 30 pages?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 26.203125, "loss": 2.239248752593994 }, { "id": "identity_284865", "image": "TabMWP/images/29098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Brazil win more medals, 1996 or 2000, when comparing their performance at the Olympic games?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.46875, "loss": 3.8437366485595703 }, { "id": "identity_269413", "image": "TabMWP/images/318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money did Savannah spend at the store when she purchased 5 pounds of pepper and 2 pounds of paprika?" }, { "from": "gpt", "value": "The answer is 52.78" } ], "clip_score": 25.734375, "loss": 2.4738402366638184 }, { "id": "identity_181417", "image": "CLEVR-Math/images/CLEVR_train_022506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of cubes to spheres?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 22.375, "loss": 1.5242974758148193 }, { "id": "identity_253614", "image": "IconQA/images/36612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the green section if the total area of the shape is 12 square units?" }, { "from": "gpt", "value": "The answer is 4 square units" } ], "clip_score": 31.65625, "loss": 1.5561963319778442 }, { "id": "identity_256223", "image": "DocVQA/images/tyvn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product for which the Material Safety Data Sheet (MSDS) was requested, and what is the total number of pages in the fax including the cover page?" }, { "from": "gpt", "value": "The answer is IGEPAL CO-897, 12 pages" } ], "clip_score": 33.5, "loss": 2.6162521839141846 }, { "id": "identity_129823", "image": "PMC-VQA/images/PMC7214677_F5_166766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of screws labeled in the image?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 19.359375, "loss": 1.8454347848892212 }, { "id": "identity_111405", "image": "VQA-RAD/images/synpic38263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which hemisphere of the brain is the abnormality located?" }, { "from": "gpt", "value": "The answer is Left hemisphere" } ], "clip_score": 28.484375, "loss": 2.3205084800720215 }, { "id": "identity_226544", "image": "AI2D/images/4044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain why the pressure is greatest deep in the Sun and describe the relationship between pressure, gravity, and temperature in the Sun's core" }, { "from": "gpt", "value": "The answer is Pressure is greatest deep in the Sun due to overlying weight" } ], "clip_score": 36.34375, "loss": 2.5880048274993896 }, { "id": "identity_276273", "image": "IconQA/images/79625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of fish present?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.046875, "loss": 2.187709093093872 }, { "id": "identity_272778", "image": "VizWiz/images/VizWiz_train_00018727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are you able to input the code displayed on the screen?" }, { "from": "gpt", "value": "The answer is slide to unlock" } ], "clip_score": 18.09375, "loss": 2.642832040786743 }, { "id": "identity_127425", "image": "Super-CLEVR/images/superCLEVR_new_000729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple objects are there in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.390625, "loss": 2.3326070308685303 }, { "id": "identity_186584", "image": "VizWiz/images/VizWiz_train_00013006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the spray bottle full or empty?" }, { "from": "gpt", "value": "The answer is Empty" } ], "clip_score": 26.765625, "loss": 2.7661685943603516 }, { "id": "identity_165181", "image": "GeoQA+/images/3543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that line m is parallel to line a and line n is parallel to line c, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 31.734375, "loss": 1.8285863399505615 }, { "id": "identity_130295", "image": "TextVQA/images/833011003958eb5e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the lanyard worn by the individual in the center?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.890625, "loss": 3.352478504180908 }, { "id": "identity_263852", "image": "UniGeo/images/1339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, PA, PB, and AB are all tangent to circle O, with angle P equal to 60°. What is the measure of angle AOB?\nChoices:\n(A) 50°\n(B) 60°\n(C) 70°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.84375, "loss": 2.323655605316162 }, { "id": "identity_53506", "image": "FigureQA/images/59489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pie chart represents 100 units, how many units does the Turquoise section represent?" }, { "from": "gpt", "value": "The answer is 25 units" } ], "clip_score": 35.15625, "loss": 1.9779002666473389 }, { "id": "identity_237884", "image": "FigureQA/images/63026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest average value across all x-axis points?" }, { "from": "gpt", "value": "The answer is Lawn Green" } ], "clip_score": 31.3125, "loss": 3.568596124649048 }, { "id": "identity_239507", "image": "VQA2.0/images/486075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can you tell that these flowers have been cut and arranged in a vase?" }, { "from": "gpt", "value": "The answer is Flowers are in a vase with cut stems arranged" } ], "clip_score": 26.6875, "loss": 2.5609302520751953 }, { "id": "identity_278297", "image": "PMC-VQA/images/PMC6991147_FIG2_108171.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the provided CT images, where are the two Valiant type endoprostheses positioned? \nOptions:\n(A) Positioned on the left above the celiac trunk and on the right below the left subclavian artery.\n(B) Positioned on the left above the celiac trunk and on the right above the right subclavian artery.\n(C) Positioned on the left below the celiac trunk and on the right above the right subclavian artery.\n(D) Positioned on the left below the celiac trunk and on the right above the left subclavian artery." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.0625, "loss": 3.383039951324463 }, { "id": "identity_20594", "image": "VQA-AS/images/18474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a dog?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.96875, "loss": 3.921619415283203 }, { "id": "identity_238468", "image": "AI2D/images/3314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the pathway of a protein from synthesis to secretion in a cell, mentioning all the organelles involved" }, { "from": "gpt", "value": "The answer is Ribosome, Rough ER, Golgi apparatus, secretory vesicles, cell membrane" } ], "clip_score": 30.65625, "loss": 1.5447571277618408 }, { "id": "identity_39748", "image": "DVQA/images/bar_train_00017100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'flux' dataset across all groups?" }, { "from": "gpt", "value": "The answer is 6.33" } ], "clip_score": 30.09375, "loss": 2.1949524879455566 }, { "id": "identity_125761", "image": "VQA2.0/images/276597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Evening" } ], "clip_score": 17.03125, "loss": 3.011014699935913 }, { "id": "identity_14602", "image": "A-OKVQA/images/493227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who manufactured the car on the right?\nChoices:\n(A) lexus\n(B) audi\n(C) bmw\n(D) mercedes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.890625, "loss": 4.261050701141357 }, { "id": "identity_117961", "image": "PMC-VQA/images/PMC5482258_f04_181823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image labeled with any patient information or identifiers?" }, { "from": "gpt", "value": "The answer is Yes, labeled with \"C\" and a number" } ], "clip_score": 24.46875, "loss": 2.482407808303833 }, { "id": "identity_115245", "image": "TextVQA/images/070d47f646990950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the winery that produced this wine?" }, { "from": "gpt", "value": "The answer is Barrique" } ], "clip_score": 22.46875, "loss": 5.066381931304932 }, { "id": "identity_53922", "image": "DVQA/images/bar_train_00119444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the 'mass' group?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.984375, "loss": 2.2445592880249023 }, { "id": "identity_150516", "image": "TQA/images/question_images/chemical_bonding_covalent_9059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is formed between the oxygen and hydrogen atoms?" }, { "from": "gpt", "value": "The answer is Covalent bond" } ], "clip_score": 35.15625, "loss": 1.8445167541503906 }, { "id": "identity_221996", "image": "PMC-VQA/images/PMC6232946_rcr2385-fig-0001_395611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No evidence of pleural effusion" } ], "clip_score": 30.875, "loss": 2.602861166000366 }, { "id": "identity_22067", "image": "PMC-VQA/images/PMC4770723_F5_6788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many images were taken during the FISH analysis? \nChoices:\n(A) Three \n(B) Two \n(C) Four \n(D) One " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.03125, "loss": 3.1865227222442627 }, { "id": "identity_316844", "image": "UniGeo/images/3290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,BD是⊙O的直径,弦AC垂直于BD,垂足为E,∠AOB=60°,则∠BDC等于()\n选择:\n(A) 30°\n(B) 45°\n(C) 60°\n(D) 90°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.078125, "loss": 2.8489551544189453 }, { "id": "identity_281762", "image": "DocVQA/images/zjpc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who holds the position of Adviser at the Zambia Council for the Blind?" }, { "from": "gpt", "value": "The answer is David L. Carter" } ], "clip_score": 22.265625, "loss": 3.225851058959961 }, { "id": "identity_131959", "image": "TextVQA/images/b6133bc44a3bab04.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 23.171875, "loss": 2.7238285541534424 }, { "id": "identity_28346", "image": "A-OKVQA/images/479213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How old is the birthday girl?\nChoices:\n(A) 30\n(B) ten\n(C) 20\n(D) 40" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.671875, "loss": 3.125886917114258 }, { "id": "identity_255653", "image": "GeoQA+/images/3713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线l1∥l2,将含30°角的直角三角板按如图方式放置,直角顶点在l2上,若∠1=76°,则∠2与∠3的和是多少?" }, { "from": "gpt", "value": "The answer is 164°" } ], "clip_score": 26.328125, "loss": 1.8217703104019165 }, { "id": "identity_168205", "image": "TabMWP/images/30795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more medals did Italy win in 1980 compared to 1976?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.125, "loss": 2.44963002204895 }, { "id": "identity_9758", "image": "Super-CLEVR/images/superCLEVR_new_001342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many other objects are there of the same size as the gray metallic bicycle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.453125, "loss": 2.5137436389923096 }, { "id": "identity_41588", "image": "PlotQA/images/55653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average net enrolment rate of males in secondary schools in Ecuador from 1999 to 2004?" }, { "from": "gpt", "value": "The answer is Approximately 56%" } ], "clip_score": 32.25, "loss": 2.3283934593200684 }, { "id": "identity_301159", "image": "ScienceQA/images/13060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this text describe the weather or the climate?\nChoices:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 4.0, "loss": 4.582136154174805 }, { "id": "identity_150825", "image": "ScienceQA/images/8848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the typical color of the kernels on a corn cob as shown in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 41.90625, "loss": 2.365863800048828 }, { "id": "identity_259714", "image": "FigureQA/images/22930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the heights of Dark Orange and Gray, and is this difference greater than the height of Blue Violet?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.65625, "loss": 3.5598397254943848 }, { "id": "identity_254792", "image": "TabMWP/images/32387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean number of couches sold per day, and how does it compare to the median?" }, { "from": "gpt", "value": "The answer is Mean is 5.57, median is 6" } ], "clip_score": 33.25, "loss": 2.6172492504119873 }, { "id": "identity_115699", "image": "A-OKVQA/images/244471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the giraffe a real animal or a model?" }, { "from": "gpt", "value": "The answer is Model" } ], "clip_score": 26.1875, "loss": 2.9109394550323486 }, { "id": "identity_121496", "image": "VQA2.0/images/22741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it likely to be in the image?" }, { "from": "gpt", "value": "The answer is It is likely sunset or sunrise" } ], "clip_score": 20.765625, "loss": 2.5297186374664307 }, { "id": "identity_145733", "image": "DocVQA/images/jsnh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Manager of Factory Accounting for the Eastern District?" }, { "from": "gpt", "value": "The answer is RICHARD F. MALLY" } ], "clip_score": 17.75, "loss": 2.272573709487915 }, { "id": "identity_276482", "image": "PlotQA/images/25732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest number of flight takeoffs recorded across all years?" }, { "from": "gpt", "value": "The answer is 9400.0" } ], "clip_score": 24.953125, "loss": 2.831890106201172 }, { "id": "identity_207424", "image": "VQA-AS/images/379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What piece of furniture is the cat sleeping on?" }, { "from": "gpt", "value": "The answer is An armchair" } ], "clip_score": 31.171875, "loss": 3.0710370540618896 }, { "id": "identity_125297", "image": "VQA2.0/images/295162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many elephants are performing in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.1875, "loss": 2.485071897506714 }, { "id": "identity_129351", "image": "VQA2.0/images/119560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many party hats are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.46875, "loss": 4.431149959564209 }, { "id": "identity_97736", "image": "IconQA/images/65843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of dogs to bones?" }, { "from": "gpt", "value": "The answer is 4:3" } ], "clip_score": 27.046875, "loss": 1.787795901298523 }, { "id": "identity_320525", "image": "MapQA/images/map_10713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in this region?" }, { "from": "gpt", "value": "The answer is 45.9%-51.2%" } ], "clip_score": 21.046875, "loss": 2.2659215927124023 }, { "id": "identity_27272", "image": "VQA-AS/images/11872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two items are both children holding?" }, { "from": "gpt", "value": "The answer is toys" } ], "clip_score": 23.6875, "loss": 2.7383716106414795 }, { "id": "identity_14465", "image": "ScienceQA/images/12230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following could Britney's test show?\nChoices:\n(A) whether producing more insulin would help the bacteria grow faster\n(B) whether she added enough nutrients to help the bacteria produce 20% more insulin\n(C) whether different types of bacteria would need different nutrients to produce insulin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.171875, "loss": 3.5545527935028076 }, { "id": "identity_307961", "image": "FigureQA/images/25778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.703125, "loss": 3.81048583984375 }, { "id": "identity_269402", "image": "TabMWP/images/34458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Harold requires to purchase both a crane and a dump truck?" }, { "from": "gpt", "value": "The answer is 18,745" } ], "clip_score": 31.171875, "loss": 2.4053118228912354 }, { "id": "identity_188199", "image": "A-OKVQA/images/573973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many speakers are visible on the desk?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 17.625, "loss": 2.8429596424102783 }, { "id": "identity_131749", "image": "A-OKVQA/images/63791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the jersey number of the player in red?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 18.640625, "loss": 2.5150258541107178 }, { "id": "identity_237579", "image": "VizWiz/images/VizWiz_train_00011042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name and the type of product shown in the picture, and how many words are used to describe the type of product?" }, { "from": "gpt", "value": "The answer is After Eight, mint chocolate thins, three" } ], "clip_score": 35.1875, "loss": 2.493206262588501 }, { "id": "identity_157968", "image": "ScienceQA/images/17161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water located between the highlighted country and Australia?" }, { "from": "gpt", "value": "The answer is Torres Strait" } ], "clip_score": 30.984375, "loss": 2.662759780883789 }, { "id": "identity_165877", "image": "Geometry3K/images/621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the figure, if m \\angle 10 = 118, find the measure of \\angle 6" }, { "from": "gpt", "value": "The answer is 118" } ], "clip_score": 27.9375, "loss": 1.8991891145706177 }, { "id": "identity_59736", "image": "FigureQA/images/7174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.40625, "loss": 2.3499889373779297 }, { "id": "identity_104760", "image": "TextVQA/images/96986e85369e6cb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city and state is this establishment located in according to the image?" }, { "from": "gpt", "value": "The answer is Bowling Green, Kentucky" } ], "clip_score": 27.6875, "loss": 1.5985183715820312 }, { "id": "identity_322474", "image": "GeoQA+/images/1820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,BD是△ABC的中线,点E,F分别为BD,CE的中点,若△ABC的面积为12.则△AEF的面积是多少?\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.46875, "loss": 2.5599026679992676 }, { "id": "identity_211834", "image": "PMC-VQA/images/PMC6233374_Fig1_395731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the presence of sulci and gyri in the MRI image?" }, { "from": "gpt", "value": "The answer is Yes, sulci and gyri are present" } ], "clip_score": 33.625, "loss": 2.147461414337158 }, { "id": "identity_10308", "image": "MapQA/images/map_10507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border New Mexico , does Colorado have the lowest value ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.59375, "loss": 2.7278366088867188 }, { "id": "identity_105235", "image": "A-OKVQA/images/169576.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any children present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.0546875, "loss": 3.1519107818603516 }, { "id": "identity_210521", "image": "VQA-AS/images/6897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is depicted on the right side of the image?" }, { "from": "gpt", "value": "The answer is Bush or shrub" } ], "clip_score": 21.109375, "loss": 3.6856863498687744 }, { "id": "identity_327758", "image": "GeoQA+/images/1039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,OP平分∠MON,PA垂直于ON于点A,点D是射线OB上的一个动点,若PA=3,则线段PD的最小值为()\nChoices:\n(A) 1\n(B) 2\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.734375, "loss": 2.431323289871216 }, { "id": "identity_111589", "image": "TextVQA/images/a2439a8c902b31fd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any image of a vehicle on the magazine?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.546875, "loss": 3.330254077911377 }, { "id": "identity_180567", "image": "TabMWP/images/21969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did Emma watch the least amount of TV?" }, { "from": "gpt", "value": "The answer is Wednesday" } ], "clip_score": 30.375, "loss": 1.9719234704971313 }, { "id": "identity_35173", "image": "IconQA/images/35913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are on the frame?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 33.53125, "loss": 2.270235776901245 }, { "id": "identity_176640", "image": "IconQA/images/21506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are circles but not orange?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.703125, "loss": 2.7362868785858154 }, { "id": "identity_308345", "image": "A-OKVQA/images/443684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the individual on the animal wearing?\nChoices:\n(A) cape\n(B) green pants\n(C) red shirt\n(D) sombrero" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.359375, "loss": 3.910059690475464 }, { "id": "identity_92714", "image": "TabMWP/images/32534.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a new student joins the band and chooses to play an instrument at random, what is the probability that they will choose the clarinet?" }, { "from": "gpt", "value": "The answer is 23/73" } ], "clip_score": 29.875, "loss": 2.5962302684783936 }, { "id": "identity_50401", "image": "DocVQA/images/syjf0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the letter?" }, { "from": "gpt", "value": "The answer is My Concerns about sNDA for Actos 45mg in the USA (Reconfirmation)" } ], "clip_score": 21.78125, "loss": 3.5154073238372803 }, { "id": "identity_94182", "image": "IconQA/images/21404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of marbles in the image?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 28.28125, "loss": 1.7115849256515503 }, { "id": "identity_194756", "image": "PMC-VQA/images/PMC3199062_fig4_112726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the highlighted brain structure?" }, { "from": "gpt", "value": "The answer is Relaying signals and regulating consciousness, sleep, and alertness" } ], "clip_score": 29.484375, "loss": 2.9842422008514404 }, { "id": "identity_184032", "image": "PMC-VQA/images/PMC3116541_F0003_99168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image showing the left or right eye?" }, { "from": "gpt", "value": "The answer is It is not possible to determine" } ], "clip_score": 24.484375, "loss": 3.4458975791931152 }, { "id": "identity_263650", "image": "IconQA/images/24010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is it when Shane's mom is reading before work, as shown on the clock? Is it 7:00 A.M. or 7:00 P.M.?\nChoices:\n(A) 7:00 A.M.\n(B) 7:00 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.265625, "loss": 3.8465309143066406 }, { "id": "identity_175887", "image": "IconQA/images/76902.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of triangle is the base of this shape?" }, { "from": "gpt", "value": "The answer is Equilateral triangle" } ], "clip_score": 34.09375, "loss": 2.4178030490875244 }, { "id": "identity_24078", "image": "MapQA/images/map_6966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which states hav the highest value in the MidWest ?" }, { "from": "gpt", "value": "The answer is South Dakota" } ], "clip_score": 33.34375, "loss": 2.282860517501831 }, { "id": "identity_276031", "image": "A-OKVQA/images/390086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What component of this dish sets it apart from the rest? Is it:\n(A) parsley\n(B) olives\n(C) vegetables\n(D) meat" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.296875, "loss": 4.154606342315674 }, { "id": "identity_1623", "image": "VQA2.0/images/446694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of bread are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.421875, "loss": 2.2503509521484375 }, { "id": "identity_277064", "image": "FigureQA/images/66744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.265625, "loss": 4.080676555633545 }, { "id": "identity_326159", "image": "IconQA/images/71974.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of these are triangles?\nChoices:\n(A) 3/10\n(B) 6/7\n(C) 3/6\n(D) 4/10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.859375, "loss": 3.4991562366485596 }, { "id": "identity_221794", "image": "A-OKVQA/images/80260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle in motion?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.453125, "loss": 2.676414966583252 }, { "id": "identity_171181", "image": "TabMWP/images/27164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units does the value of Y decrease for every unit increase in X?" }, { "from": "gpt", "value": "The answer is 9 units" } ], "clip_score": 30.0, "loss": 2.877436876296997 }, { "id": "identity_219681", "image": "A-OKVQA/images/20391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked next to the building with the black sign?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 26.25, "loss": 2.6242663860321045 }, { "id": "identity_291039", "image": "PMC-VQA/images/PMC2829589_F1_58106.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which area of the thalamus is included within the radiation field?\nChoices:\n(A) Right thalamus lateral part\n(B) Left thalamus lateral part\n(C) Right thalamus medial part\n(D) Left thalamus medial part." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.8125, "loss": 3.5704987049102783 }, { "id": "identity_95621", "image": "IconQA/images/73042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you were to arrange the triangles to form a rectangle, what would be the number of triangles on the longer side of the rectangle?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.515625, "loss": 2.1674563884735107 }, { "id": "identity_70065", "image": "ScienceQA/images/15946.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distinctive feature of the animal's hands that helps it to grip tree branches?" }, { "from": "gpt", "value": "The answer is Two opposable digits (thumbs)" } ], "clip_score": 20.515625, "loss": 2.976691246032715 }, { "id": "identity_53429", "image": "DocVQA/images/gfhb0018_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hotel mentioned for the event?" }, { "from": "gpt", "value": "The answer is Stouffer Tower City Plaza Hotel" } ], "clip_score": 17.1875, "loss": 3.076094388961792 }, { "id": "identity_38341", "image": "DocVQA/images/rrcw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page no mentioned in this document?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.421875, "loss": 2.9848220348358154 }, { "id": "identity_100437", "image": "TabMWP/images/13911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Whitney had not received the $15.00 on 10/22, what would her available funds be?" }, { "from": "gpt", "value": "The answer is $169.15" } ], "clip_score": 30.21875, "loss": 2.129779577255249 }, { "id": "identity_15398", "image": "ScienceQA/images/5905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Complete the statement.\nBromomethane is ().\nChoices:\n(A) an elementary substance\n(B) a compound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.53125, "loss": 3.0666842460632324 }, { "id": "identity_153593", "image": "ScienceQA/images/19540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that is directly east of Nevada?" }, { "from": "gpt", "value": "The answer is Salt Lake City, Utah" } ], "clip_score": 26.375, "loss": 1.6866559982299805 }, { "id": "identity_64194", "image": "AI2D/images/4301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the large igneous bodies represented by the label \"Batholiths\"?" }, { "from": "gpt", "value": "The answer is Batholiths" } ], "clip_score": 26.71875, "loss": 1.7453906536102295 }, { "id": "identity_53713", "image": "FigureQA/images/46944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Dark Red section the largest in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 35.1875, "loss": 3.4673714637756348 }, { "id": "identity_118939", "image": "VQA-AS/images/11266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many evergreen trees are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.0, "loss": 2.452897310256958 }, { "id": "identity_328189", "image": "PMC-VQA/images/PMC4908129_F5_41398.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was linked with VBM?\nChoices:\n(A) CT scan\n(B) MRI\n(C) PET\n(D) SPECT" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.859375, "loss": 3.7062859535217285 }, { "id": "identity_186717", "image": "TextVQA/images/24a5986578a16f29.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bridge primarily painted in?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.234375, "loss": 3.262880802154541 }, { "id": "identity_212887", "image": "VQA2.0/images/363191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the refrigerator's interior light?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.203125, "loss": 3.253798723220825 }, { "id": "identity_7172", "image": "VQA2.0/images/425721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bowls are the bananas in?" }, { "from": "gpt", "value": "The answer is foil" } ], "clip_score": 29.265625, "loss": 4.366422176361084 }, { "id": "identity_36971", "image": "UniGeo/images/715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC中,∠B=90°,AB=1,BC=2,则sinA=()\nChoices:\n(A) \\frac{√{5}}{2}\n(B) \\frac{1}{2}\n(C) \\frac{2√{5}}{5}\n(D) \\frac{√{5}}{5}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.453125, "loss": 2.7337002754211426 }, { "id": "identity_150362", "image": "ScienceQA/images/20726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the north?" }, { "from": "gpt", "value": "The answer is Oregon" } ], "clip_score": 34.875, "loss": 3.3597919940948486 }, { "id": "identity_176999", "image": "TabMWP/images/29751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Jared need to buy a history book and a crossword puzzle book?" }, { "from": "gpt", "value": "The answer is $39" } ], "clip_score": 32.5625, "loss": 2.337268829345703 }, { "id": "identity_24753", "image": "GeoQA+/images/2376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,P为⊙O外一点,PA、PB分别切⊙O于A、B,CD切⊙O于点E,分别交PA、PB于点C、D,若PA=15,则△PCD的周长为()\nChoices:\n(A) 15\n(B) 30\n(C) 18\n(D) 25" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.140625, "loss": 2.376520872116089 }, { "id": "identity_23545", "image": "GeoQA+/images/2502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知1号,4号两个正方形的面积和为7,2号,3号两个正方形的而积和为4,则a,b,c三个方形的面积和为()\nChoices:\n(A) 10\n(B) 13\n(C) 15\n(D) 22" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.390625, "loss": 2.557833433151245 }, { "id": "identity_149028", "image": "MapQA/images/map_9469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of liberals in the lightest shaded states?" }, { "from": "gpt", "value": "The answer is 852890-854315" } ], "clip_score": 29.234375, "loss": 2.1951894760131836 }, { "id": "identity_168664", "image": "IconQA/images/20738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the camera icon represents one unit, how many units wide is the white rectangle?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.265625, "loss": 2.194296360015869 }, { "id": "identity_101608", "image": "TabMWP/images/34636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in weight between the heaviest and lightest winning dogs?" }, { "from": "gpt", "value": "The answer is 14 kg" } ], "clip_score": 36.5, "loss": 2.3620007038116455 }, { "id": "identity_211887", "image": "A-OKVQA/images/110105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the top of the sign in the background?" }, { "from": "gpt", "value": "The answer is Småtrollcupen" } ], "clip_score": 21.75, "loss": 4.767800807952881 }, { "id": "identity_11201", "image": "PMC-VQA/images/PMC7788113_Fig1_336002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lung tumor located in the CT images?\nChoices:\n(A) Right S6 segment. \n(B) Left S6 segment. \n(C) B1 + 2 segment. \n(D) S1 + 2 segment." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.84375, "loss": 4.171874523162842 }, { "id": "identity_253119", "image": "IconQA/images/35174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line is extended by 50% of its current length, what will be the new length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 31.1875, "loss": 2.97528338432312 }, { "id": "identity_133522", "image": "TextVQA/images/2235bca399bfd395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is advertised on the side of the blue double-decker bus?" }, { "from": "gpt", "value": "The answer is within ten words" } ], "clip_score": 17.859375, "loss": 4.5962700843811035 }, { "id": "identity_123579", "image": "A-OKVQA/images/50960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many biscuits are there on the orange plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.859375, "loss": 4.399588108062744 }, { "id": "identity_59435", "image": "FigureQA/images/6190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Bubblegum in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 50%" } ], "clip_score": 33.59375, "loss": 2.3051421642303467 }, { "id": "identity_27420", "image": "Super-CLEVR/images/superCLEVR_new_001949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many other things are there of the same material as the yellow suv?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 19.734375, "loss": 2.5286662578582764 }, { "id": "identity_124555", "image": "PMC-VQA/images/PMC5646889_F2_228583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible difference in density between the lesion and the surrounding brain tissue?" }, { "from": "gpt", "value": "The answer is Yes, the lesion is lighter in color" } ], "clip_score": 32.90625, "loss": 2.116133689880371 }, { "id": "identity_267315", "image": "VQA-AS/images/9164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is their plan of action?" }, { "from": "gpt", "value": "The answer is have cheese and wine" } ], "clip_score": 24.328125, "loss": 4.258848667144775 }, { "id": "identity_105987", "image": "A-OKVQA/images/197329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many zebras are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.53125, "loss": 4.562018394470215 }, { "id": "identity_127022", "image": "VQA-RAD/images/synpic33422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lung fields clear of any infiltrates or masses in this chest X-ray?" }, { "from": "gpt", "value": "The answer is Yes, the lung fields are clear" } ], "clip_score": 32.75, "loss": 1.9917149543762207 }, { "id": "identity_298714", "image": "DocVQA/images/mhvd0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is referenced in this document?" }, { "from": "gpt", "value": "The answer is October 30, 1967" } ], "clip_score": 25.015625, "loss": 1.870348334312439 }, { "id": "identity_196411", "image": "A-OKVQA/images/67966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant topping on the pizza?" }, { "from": "gpt", "value": "The answer is Pepperoni" } ], "clip_score": 26.53125, "loss": 2.6769216060638428 }, { "id": "identity_93525", "image": "IconQA/images/28008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is divided into equal sections, how many sections would there be?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.546875, "loss": 1.9802981615066528 }, { "id": "identity_306362", "image": "MapQA/images/map_3693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value here?" }, { "from": "gpt", "value": "The answer is 67.6%-69.2%" } ], "clip_score": 19.4375, "loss": 2.04338002204895 }, { "id": "identity_225106", "image": "A-OKVQA/images/175831.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From which floor's balcony could someone get the most optimal view of the flag and the entire front facade of the building, considering the height and angle of view?" }, { "from": "gpt", "value": "The answer is fourth" } ], "clip_score": 24.84375, "loss": 4.717343330383301 }, { "id": "identity_6038", "image": "GeoQA+/images/5186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,圆锥模具的母线长为10,底面半径为5,该圆锥模具的侧面积是()\nChoices:\n(A) 10π\n(B) 50π\n(C) 100π\n(D) 150π" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.4375, "loss": 2.880730628967285 }, { "id": "identity_118597", "image": "VQA2.0/images/375201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the table or surface the vase is placed on?" }, { "from": "gpt", "value": "The answer is Brown or wooden" } ], "clip_score": 25.75, "loss": 3.688464641571045 }, { "id": "identity_310484", "image": "ScienceQA/images/14398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could Anita's test reveal?\nChoices:\n(A) if the blade guards would break in a crash\n(B) how much the drone weighed with the blade guards\n(C) if adding the blade guards made the drone fly poorly" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.15625, "loss": 3.743593692779541 }, { "id": "identity_185911", "image": "VQA2.0/images/279009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the registration number of the plane?" }, { "from": "gpt", "value": "The answer is N177X" } ], "clip_score": 25.609375, "loss": 1.905240774154663 }, { "id": "identity_237908", "image": "ScienceQA/images/8524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is attract" } ], "clip_score": 21.25, "loss": 3.1716742515563965 }, { "id": "identity_85371", "image": "UniGeo/images/2444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment DB?" }, { "from": "gpt", "value": "The answer is 16 meters" } ], "clip_score": 27.375, "loss": 3.191749334335327 }, { "id": "identity_56483", "image": "FigureQA/images/39854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bar representing Sienna shorter than the bar representing Indian Red?" }, { "from": "gpt", "value": "The answer is No, they are the same length" } ], "clip_score": 30.171875, "loss": 2.7273194789886475 }, { "id": "identity_207286", "image": "VizWiz/images/VizWiz_train_00013440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the birds' feathers?" }, { "from": "gpt", "value": "The answer is Brown and white" } ], "clip_score": 22.28125, "loss": 3.559702157974243 }, { "id": "identity_179723", "image": "TabMWP/images/18728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of roller coasters per amusement park?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 24.59375, "loss": 2.7906477451324463 }, { "id": "identity_145199", "image": "DocVQA/images/kgpx0078_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the event reported?" }, { "from": "gpt", "value": "The answer is Jiffy Lube Miami 300" } ], "clip_score": 22.375, "loss": 2.637728452682495 }, { "id": "identity_171394", "image": "TabMWP/images/35366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the donut shop had a goal to sell at least 500 donuts each day, on which days did they fail to meet their goal?" }, { "from": "gpt", "value": "The answer is Saturday, Sunday, Monday, Tuesday" } ], "clip_score": 35.90625, "loss": 1.6179511547088623 }, { "id": "identity_104180", "image": "PMC-VQA/images/PMC5641062_F4_226991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is shown in the image?" }, { "from": "gpt", "value": "The answer is Skeletal muscle tissue" } ], "clip_score": 18.875, "loss": 3.087960958480835 }, { "id": "identity_182818", "image": "PMC-VQA/images/PMC5203875_fig5_109122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image showing a normal anatomical structure or a pathological condition?" }, { "from": "gpt", "value": "The answer is Normal anatomical structure" } ], "clip_score": 24.1875, "loss": 2.119649887084961 }, { "id": "identity_58586", "image": "FigureQA/images/15273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the graph have a title?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.75, "loss": 3.1851305961608887 }, { "id": "identity_231237", "image": "CLEVR-Math/images/CLEVR_train_012685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 7 blocks to the existing shapes, how many shapes will there be in total?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 22.015625, "loss": 2.188260316848755 }, { "id": "identity_8523", "image": "ScienceQA/images/1799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fish's mouth is also adapted for tearing through meat?\nChoices:\n(A) parrotfish\n(B) starry moray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.328125, "loss": 4.401841163635254 }, { "id": "identity_160484", "image": "Geometry3K/images/1035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle adjacent to the angle measuring 115°?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 27.609375, "loss": 2.012474536895752 }, { "id": "identity_151801", "image": "ScienceQA/images/9183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state borders the highlighted state to the east?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 33.375, "loss": 2.841977119445801 }, { "id": "identity_171950", "image": "IconQA/images/64765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the letter 'F' in grid squares after transformation?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.09375, "loss": 2.4644579887390137 }, { "id": "identity_97529", "image": "IconQA/images/67462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the stars are in the bottom row?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 24.703125, "loss": 1.7210224866867065 }, { "id": "identity_85184", "image": "Geometry3K/images/1549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m \\angle 1 = 50 and m \\angle 3 = 60, find the measure of \\angle 5" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 28.421875, "loss": 2.318453550338745 }, { "id": "identity_27070", "image": "UniGeo/images/2354.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,CD为Rt△ABC斜边上的高,AC:BC=3:2,如果S~△ADC~=9,那么S~△BDC~等于()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.140625, "loss": 2.6796658039093018 }, { "id": "identity_203270", "image": "A-OKVQA/images/505469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's feathers?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.421875, "loss": 2.8088226318359375 }, { "id": "identity_201242", "image": "VQA2.0/images/474550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flavors of doughnuts are listed on the menu board?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 29.34375, "loss": 4.093553066253662 }, { "id": "identity_17379", "image": "DVQA/images/bar_train_00153852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart presented in a percentage scale?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.3125, "loss": 3.8403728008270264 }, { "id": "identity_334151", "image": "TabMWP/images/33599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Pam is checking the prices of new cell phones online. Her favorite company, OrangeTech, has a special this weekend. Pam can either add an upgrade to a phone for an additional cost or buy a used phone to get a discount. The change in price for each option is shown in the table. Which option results in a larger change in price?\nChoices:\n(A) adding an upgrade\n(B) buying a used phone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.078125, "loss": 3.258368730545044 }, { "id": "identity_110933", "image": "VQA2.0/images/54893.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the bird's wingtips?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.546875, "loss": 4.191312789916992 }, { "id": "identity_194411", "image": "PMC-VQA/images/PMC3834309_pone-0080939-g005_244798.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the marked region labeled as number 2?" }, { "from": "gpt", "value": "The answer is Irregular shape" } ], "clip_score": 16.1875, "loss": 4.2131242752075195 }, { "id": "identity_219251", "image": "A-OKVQA/images/500396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the railway company operating the train?" }, { "from": "gpt", "value": "The answer is ÖBB (Austrian Federal Railways)" } ], "clip_score": 30.703125, "loss": 1.2741689682006836 }, { "id": "identity_107915", "image": "VQA-AS/images/11789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the yarn the cat is playing with?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.640625, "loss": 2.9581851959228516 }, { "id": "identity_33266", "image": "A-OKVQA/images/554729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does the luggage on this belt come from?\nChoices:\n(A) bathroom\n(B) airplane\n(C) store\n(D) home" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.660214424133301 }, { "id": "identity_312163", "image": "UniGeo/images/948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,∠ACB是⊙O的弧AB所对的圆周角,点P是⊙O的割线m上任意一点,线段PA,PB的中点分别为D,E,若⊙O的半径为8cm,∠ACB=30°,则线段DE的长为多少?\n选项:\n(A) 2cm\n(B) 4cm\n(C) 4√{2}cm\n(D) 4√{3}cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 2.547457218170166 }, { "id": "identity_131522", "image": "VQA2.0/images/293589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing with his right hand?" }, { "from": "gpt", "value": "The answer is Waving" } ], "clip_score": 15.1875, "loss": 2.6593854427337646 }, { "id": "identity_258351", "image": "IconQA/images/85394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of all the stars in the sequence?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 24.890625, "loss": 1.9303771257400513 }, { "id": "identity_295625", "image": "DocVQA/images/kslw0227_25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title located at the top of the page?" }, { "from": "gpt", "value": "The answer is Electrolytes in Normal Pregnancy" } ], "clip_score": 20.53125, "loss": 1.3969494104385376 }, { "id": "identity_2790", "image": "PMC-VQA/images/PMC6717321_Fig2_30960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being measured in the postoperative radiographs? \nChoices:\n(A) Hip joint space \n(B) Leg length shortening \n(C) Bone density \n(D) Spinal curvature " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.71875, "loss": 3.859344482421875 }, { "id": "identity_20520", "image": "A-OKVQA/images/178666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does the Pompeii exhibit end?\nChoices:\n(A) 2010\n(B) 2011\n(C) 2012\n(D) 2013" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.4609375, "loss": 3.4847629070281982 }, { "id": "identity_191819", "image": "A-OKVQA/images/351422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are holding umbrellas?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.59375, "loss": 2.330427885055542 }, { "id": "identity_133496", "image": "TextVQA/images/1a1dc4f649c70562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest it is?" }, { "from": "gpt", "value": "The answer is Nighttime" } ], "clip_score": 14.2421875, "loss": 3.442000389099121 }, { "id": "identity_67526", "image": "AI2D/images/3530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the \"glycogen granule\" in the Typical Animal Cell?" }, { "from": "gpt", "value": "The answer is Energy storage" } ], "clip_score": 26.78125, "loss": 3.1525838375091553 }, { "id": "identity_220451", "image": "A-OKVQA/images/174185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible signage on the building behind the carriage?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.09375, "loss": 2.6964712142944336 }, { "id": "identity_197730", "image": "TextVQA/images/5d782a53ff52845c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the border of the poster?" }, { "from": "gpt", "value": "The answer is Maroon" } ], "clip_score": 22.109375, "loss": 3.3373186588287354 }, { "id": "identity_183182", "image": "Super-CLEVR/images/superCLEVR_new_000932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.046875, "loss": 2.6317455768585205 }, { "id": "identity_181927", "image": "CLEVR-Math/images/CLEVR_train_028445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove the largest object, how many objects remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.0, "loss": 1.9302560091018677 }, { "id": "identity_146017", "image": "DocVQA/images/ssjw0227_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the photographer who took the photographs on page 17?" }, { "from": "gpt", "value": "The answer is Tony Rollo, Newspak" } ], "clip_score": 23.5, "loss": 4.821590900421143 }, { "id": "identity_234921", "image": "IconQA/images/90663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangular faces does this shape have, and what is the total number of faces?" }, { "from": "gpt", "value": "The answer is 4 triangular faces, 4 total faces" } ], "clip_score": 30.921875, "loss": 2.1938955783843994 }, { "id": "identity_322650", "image": "A-OKVQA/images/332463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the water cooler sitting behind something in the center of the room?\nChoices:\n(A) brown\n(B) white\n(C) blue\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.609375, "loss": 3.9106249809265137 }, { "id": "identity_323603", "image": "DVQA/images/bar_train_00048541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store is represented by the darkorange color?" }, { "from": "gpt", "value": "The answer is change" } ], "clip_score": 25.296875, "loss": 4.338281631469727 }, { "id": "identity_325608", "image": "A-OKVQA/images/559483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person in the yellow shirt on the mountain?\nChoices:\n(A) to hike\n(B) to eat\n(C) to ski\n(D) to sleep" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.828125, "loss": 3.9629111289978027 }, { "id": "identity_247951", "image": "TabMWP/images/2334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Reggie need to buy two cans of cat food, a dog toy, and a cat toy?" }, { "from": "gpt", "value": "The answer is $9.64" } ], "clip_score": 32.90625, "loss": 2.0243067741394043 }, { "id": "identity_291329", "image": "A-OKVQA/images/503832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is closest to the giraffe: baby eel, hat, zebra, or baby?\nChoices:\n(A) baby eel\n(B) hat\n(C) zebra\n(D) baby" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.078125, "loss": 4.029810905456543 }, { "id": "identity_121680", "image": "VQA2.0/images/298777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a fire extinguisher in the room?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.765625, "loss": 3.0420660972595215 }, { "id": "identity_269494", "image": "IconQA/images/68092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) unlikely\n(B) certain\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.640625, "loss": 3.5088279247283936 }, { "id": "identity_296162", "image": "AI2D/images/4593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term describes projections resembling the teeth of a comb among the following options: Elbowed, Lamellate, Serrate, or Pectinate?\nChoices:\n(A) Elbowed\n(B) Lamellate\n(C) Serrate\n(D) Pectinate" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.875, "loss": 3.966513156890869 }, { "id": "identity_201095", "image": "A-OKVQA/images/117781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of hats are the majority of the people wearing?" }, { "from": "gpt", "value": "The answer is Conical straw hats" } ], "clip_score": 21.953125, "loss": 2.549464702606201 }, { "id": "identity_11580", "image": "ScienceQA/images/15915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 16 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 21 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.7421875, "loss": 4.110676288604736 }, { "id": "identity_77146", "image": "ScienceQA/images/13572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the desert located on the western coast of the highlighted continent?" }, { "from": "gpt", "value": "The answer is Atacama Desert" } ], "clip_score": 24.421875, "loss": 1.8412835597991943 }, { "id": "identity_150632", "image": "ScienceQA/images/5326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the object in the image change its shape without external force?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.765625, "loss": 3.0786914825439453 }, { "id": "identity_108377", "image": "TextVQA/images/061c4ad060db05ca.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many overs have been bowled in the match so far?" }, { "from": "gpt", "value": "The answer is 27.1" } ], "clip_score": 26.125, "loss": 1.639392375946045 }, { "id": "identity_9081", "image": "PMC-VQA/images/PMC7350248_ijms-21-04518-f001_210224.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color of the blood vessels? \nChoices:\n(A) red \n(B) green \n(C) blue \n(D) yellow " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.6875, "loss": 4.726572036743164 }, { "id": "identity_215378", "image": "A-OKVQA/images/265243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest it was taken?" }, { "from": "gpt", "value": "The answer is Nighttime" } ], "clip_score": 17.875, "loss": 3.1295931339263916 }, { "id": "identity_238833", "image": "DocVQA/images/gncb0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of all sessions before lunch?" }, { "from": "gpt", "value": "The answer is 2.75 hours" } ], "clip_score": 20.1875, "loss": 1.790968894958496 }, { "id": "identity_67216", "image": "TQA/images/question_images/seasons_2625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which Earth's position does the Northern Hemisphere experience the longest day of the year?" }, { "from": "gpt", "value": "The answer is 21 June solstice" } ], "clip_score": 36.09375, "loss": 1.7614578008651733 }, { "id": "identity_12366", "image": "TextVQA/images/7bf2efb3f6d4c96a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many cups?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.359375, "loss": 1.3102178573608398 }, { "id": "identity_301040", "image": "GeoQA+/images/3433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点E、F分别为BD、CE的中点,若△AEF的面积为3,则△ABC的面积是多少?\n选项:\n(A) 9\n(B) 10\n(C) 11\n(D) 12" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.75, "loss": 2.4036288261413574 }, { "id": "identity_186135", "image": "A-OKVQA/images/123555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the traffic light for the vehicles in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 20.90625, "loss": 2.5785021781921387 }, { "id": "identity_202443", "image": "PMC-VQA/images/PMC6038976_F2_340196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these images from the same patient?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.953125, "loss": 2.8188908100128174 }, { "id": "identity_26569", "image": "TQA/images/question_images/waves_9292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the measurement from the resting position of particles or magnetic fields to the wave crest called?\nChoices:\n(A) elevation\n(B) amplitude\n(C) wavelength\n(D) trough" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.3125, "loss": 3.679521083831787 }, { "id": "identity_315037", "image": "DocVQA/images/htdw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the email?" }, { "from": "gpt", "value": "The answer is kmittle@dwrite.com" } ], "clip_score": 15.296875, "loss": 4.190824508666992 }, { "id": "identity_164385", "image": "GeoQA+/images/5224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of the circle, ∠CAB=35°, and the radius of the circle is 5, what is the length of line segment AC?" }, { "from": "gpt", "value": "The answer is 2 * 5 * sin(35°)" } ], "clip_score": 34.875, "loss": 1.6058632135391235 }, { "id": "identity_180785", "image": "IconQA/images/46797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of small cubes between the third and fourth largest blocks?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 26.4375, "loss": 2.402007818222046 }, { "id": "identity_211432", "image": "A-OKVQA/images/188955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ski lifts are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.96875, "loss": 2.5013575553894043 }, { "id": "identity_155998", "image": "ScienceQA/images/18762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average final temperature of the salmon on both halves of the grill?" }, { "from": "gpt", "value": "The answer is 50°C" } ], "clip_score": 27.265625, "loss": 1.9035241603851318 }, { "id": "identity_305849", "image": "FigureQA/images/84196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these two lines intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 11.828125, "loss": 3.782992124557495 }, { "id": "identity_45650", "image": "DVQA/images/bar_train_00186136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a negative value for the blue bars?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.375, "loss": 2.5228664875030518 }, { "id": "identity_165161", "image": "Geometry3K/images/1450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangle RST is similar to another triangle XYZ with side XY corresponding to side RS and side XZ corresponding to side RT, and XY = 42, what is the length of side XZ?" }, { "from": "gpt", "value": "The answer is 88" } ], "clip_score": 32.4375, "loss": 2.8337230682373047 }, { "id": "identity_265857", "image": "AI2D/images/2147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which stage do Larvae develop into polyps? \nChoices:\n(A) 1st\n(B) 3rd\n(C) 4th\n(D) 2nd" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.984375, "loss": 4.171139717102051 }, { "id": "identity_337632", "image": "DocVQA/images/jqnm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is mentioned as the reporter?" }, { "from": "gpt", "value": "The answer is JOHANNA DWYER" } ], "clip_score": 21.5625, "loss": 2.8579471111297607 }, { "id": "identity_183901", "image": "VQA-AS/images/18593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many living beings are present in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.765625, "loss": 2.1563103199005127 }, { "id": "identity_165640", "image": "Geometry3K/images/464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the chord GH if the radius of the circle is 10 units?" }, { "from": "gpt", "value": "The answer is 16.87 units" } ], "clip_score": 37.34375, "loss": 1.9687248468399048 }, { "id": "identity_198623", "image": "PMC-VQA/images/PMC3418000_F2_149475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical region is primarily shown in these CT scan images?" }, { "from": "gpt", "value": "The answer is The head, specifically the brain region" } ], "clip_score": 27.359375, "loss": 3.5538933277130127 }, { "id": "identity_241210", "image": "Super-CLEVR/images/superCLEVR_new_001987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many things are either red rubber road bikes or metal vehicles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.3125, "loss": 2.3851420879364014 }, { "id": "identity_40499", "image": "FigureQA/images/35417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Dark Orange the smallest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 38.78125, "loss": 3.113987445831299 }, { "id": "identity_170980", "image": "TabMWP/images/10571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of three beautiful yellow shells and one scallop shell?" }, { "from": "gpt", "value": "The answer is $0.11" } ], "clip_score": 40.15625, "loss": 1.983561635017395 }, { "id": "identity_204981", "image": "VizWiz/images/VizWiz_train_00005917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What trademark symbol is visible next to the brand logo?" }, { "from": "gpt", "value": "The answer is TM (Trademark)" } ], "clip_score": 18.46875, "loss": 1.481645941734314 }, { "id": "identity_16857", "image": "TQA/images/abc_question_images/earth_eclipses_10681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Umbra?\nChoices:\n(A) K\n(B) U\n(C) D\n(D) V" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.453125, "loss": 4.567019462585449 }, { "id": "identity_191303", "image": "PMC-VQA/images/PMC7035375_Fig1_122618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color scale indicating at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Intensity or activity level in the brain" } ], "clip_score": 26.75, "loss": 2.6551084518432617 }, { "id": "identity_285643", "image": "PMC-VQA/images/PMC5833877_fig2_282964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized in this patient? \nOptions:\n(A) PET scan\n(B) MRI scan\n(C) CT scan\n(D) X-Ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.8125, "loss": 3.8718528747558594 }, { "id": "identity_318116", "image": "DVQA/images/bar_train_00178555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the smallest individual bar in this chart?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.40625, "loss": 1.9447568655014038 }, { "id": "identity_221920", "image": "A-OKVQA/images/477343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the jacket of the person closest to the man in red?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 15.7578125, "loss": 4.107105731964111 }, { "id": "identity_21909", "image": "PMC-VQA/images/PMC7738134_F1_321312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the condition of the patient's intraoral mucosa observed in the image?\nChoices:\n(A) Intact \n(B) Swollen \n(C) Blistered \n(D) Ruptured" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.90625, "loss": 3.9244911670684814 }, { "id": "identity_121697", "image": "A-OKVQA/images/376590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are watching the man perform?" }, { "from": "gpt", "value": "The answer is An integer number representing the count of people" } ], "clip_score": 12.984375, "loss": 3.415799379348755 }, { "id": "identity_67387", "image": "ScienceQA/images/679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary ingredient used to make the object shown in the image?" }, { "from": "gpt", "value": "The answer is Potatoes" } ], "clip_score": 25.71875, "loss": 3.138535261154175 }, { "id": "identity_10249", "image": "FigureQA/images/83994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rosy Brown greater than Lime Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.671875, "loss": 3.936363935470581 }, { "id": "identity_54017", "image": "DVQA/images/bar_train_00089586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between 'stop' and 'eye' in the 'yard' category?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 26.4375, "loss": 2.105159282684326 }, { "id": "identity_160472", "image": "Geometry3K/images/1604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the degree measures of minor arc AC and major arc ADC?" }, { "from": "gpt", "value": "The answer is 360" } ], "clip_score": 29.203125, "loss": 2.019111156463623 }, { "id": "identity_47810", "image": "FigureQA/images/98811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of Medium Blue on the y-axis?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 28.15625, "loss": 2.526171922683716 }, { "id": "identity_65420", "image": "ScienceQA/images/1559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete flowers can be counted in the image?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 22.390625, "loss": 3.538818836212158 }, { "id": "identity_2248", "image": "VQA2.0/images/131580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of shoes is she wearing?" }, { "from": "gpt", "value": "The answer is sandals" } ], "clip_score": 22.28125, "loss": 2.876347303390503 }, { "id": "identity_89723", "image": "Geometry3K/images/1695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If y = 20 degrees, what is the value of x for the quadrilateral to be a parallelogram?" }, { "from": "gpt", "value": "The answer is x = 0 degrees" } ], "clip_score": 34.8125, "loss": 1.9034031629562378 }, { "id": "identity_79173", "image": "UniGeo/images/3102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle A in the given triangle?" }, { "from": "gpt", "value": "The answer is 51 degrees" } ], "clip_score": 34.4375, "loss": 2.471014976501465 }, { "id": "identity_121754", "image": "TextVQA/images/65df7f90b9cef4a6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times is the name \"Edgar\" mentioned on both pages?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 17.546875, "loss": 1.9662402868270874 }, { "id": "identity_56696", "image": "DocVQA/images/yznh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who suggested calling the governor of Pueblo Cochiti?" }, { "from": "gpt", "value": "The answer is Joe Quintana" } ], "clip_score": 24.71875, "loss": 3.6738250255584717 }, { "id": "identity_18099", "image": "Geometry3K/images/1248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Triangle W X Y has an area of 248 square inches. Find the length of the base.\nChoices:\n(A) 15.5\n(B) 16\n(C) 31\n(D) 32" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0, "loss": 2.9567134380340576 }, { "id": "identity_148689", "image": "DocVQA/images/myxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ZIP code for Duke University Medical Center?" }, { "from": "gpt", "value": "The answer is 27710" } ], "clip_score": 21.140625, "loss": 1.3413124084472656 }, { "id": "identity_47093", "image": "DocVQA/images/qhxn0020_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the retail partner mentioned in the contract?" }, { "from": "gpt", "value": "The answer is Cumberland Farms" } ], "clip_score": 24.046875, "loss": 2.7448554039001465 }, { "id": "identity_34652", "image": "DVQA/images/bar_train_00113324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the most number of people summed across all the categories?" }, { "from": "gpt", "value": "The answer is site" } ], "clip_score": 32.9375, "loss": 3.4293761253356934 }, { "id": "identity_53948", "image": "DocVQA/images/jhng0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the sender of the letter?" }, { "from": "gpt", "value": "The answer is J. Blumen" } ], "clip_score": 21.90625, "loss": 3.8885622024536133 }, { "id": "identity_283072", "image": "TextVQA/images/beed178ed1267db3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items are available for free at this event?" }, { "from": "gpt", "value": "The answer is beer and food" } ], "clip_score": 9.4921875, "loss": 1.9345989227294922 }, { "id": "identity_307390", "image": "PMC-VQA/images/PMC5980103_Fig3_322321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by this in the image?\nChoices:\n(A) dental pulp \n(B) amelogenin \n(C) enamel \n(D) epithelial pearls" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.171875, "loss": 4.080502986907959 }, { "id": "identity_174029", "image": "TabMWP/images/15305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does 1 kilogram of Swiss chard cost compared to 1 kilogram of romaine lettuce?" }, { "from": "gpt", "value": "The answer is $1" } ], "clip_score": 28.40625, "loss": 2.2188923358917236 }, { "id": "identity_322505", "image": "GeoQA+/images/6011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C、D在圆上,且∠BAC=28°,则∠ADC是多少?\nChoices:\n(A) 52°\n(B) 56°\n(C) 62°\n(D) 72°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.109375, "loss": 2.42421293258667 }, { "id": "identity_253790", "image": "A-OKVQA/images/560943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential safety benefits of having a barrier near the curb during a racing event, and how might it impact both participants and spectators?" }, { "from": "gpt", "value": "The answer is Enhanced safety for participants and spectators, reduced accidents and injuries" } ], "clip_score": 28.15625, "loss": 2.9542460441589355 }, { "id": "identity_230537", "image": "GeoQA+/images/4010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, 圆O与正方形ABCD的两边AB、AD相切, 且DE与圆O相切于E点.若圆O的半径为5, 且AB=11, 求DE的长度以及正方形ABCD的对角线AC的长度之和。" }, { "from": "gpt", "value": "The answer is \\( \\sqrt{47} + 11\\sqrt{2} \\)" } ], "clip_score": 26.78125, "loss": 2.0705645084381104 }, { "id": "identity_177523", "image": "TabMWP/images/18250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy 2 pounds of oatmeal raisin cookie dough and 1 pound of chocolate chip cookie dough?" }, { "from": "gpt", "value": "The answer is $19" } ], "clip_score": 28.875, "loss": 2.4755442142486572 }, { "id": "identity_320817", "image": "GeoQA+/images/5752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、D是⊙O上的两点,BC是直径,若∠D=35°,则∠OCA的度数是多少?\n选项:\n(A) 35°\n(B) 55°\n(C) 65°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 2.7540297508239746 }, { "id": "identity_167207", "image": "TabMWP/images/22847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of pull-ups done by the classmates?" }, { "from": "gpt", "value": "The answer is 63.5" } ], "clip_score": 21.296875, "loss": 2.3859429359436035 }, { "id": "identity_58053", "image": "DocVQA/images/fhng0227_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the copies of the order sent to?" }, { "from": "gpt", "value": "The answer is Lucy Hume and Miss Kaiser James J. Cullen" } ], "clip_score": 24.1875, "loss": 2.356090784072876 }, { "id": "identity_280359", "image": "IconQA/images/65245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of squares present on the frame?\nChoices:\n(A) 8\n(B) 6\n(C) 10\n(D) 2\n(E) 1" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.953125, "loss": 3.5053608417510986 }, { "id": "identity_43158", "image": "FigureQA/images/91176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Bubblegum at the 50 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 29.546875, "loss": 2.6525111198425293 }, { "id": "identity_74811", "image": "ScienceQA/images/16842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What rule can be applied to predict the behavior of the magnets in the image?" }, { "from": "gpt", "value": "The answer is Like poles repel; unlike poles attract" } ], "clip_score": 20.765625, "loss": 2.1330478191375732 }, { "id": "identity_69445", "image": "AI2D/images/1895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two animals are at the same trophic level as the scorpion?" }, { "from": "gpt", "value": "The answer is Lizard and Tarantula" } ], "clip_score": 30.421875, "loss": 1.981101393699646 }, { "id": "identity_198566", "image": "PMC-VQA/images/PMC4464925_F2_396007.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Coronal plane" } ], "clip_score": 26.015625, "loss": 4.197678089141846 }, { "id": "identity_66515", "image": "ScienceQA/images/2196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the bird have any visible leg bands or tags?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.4375, "loss": 2.9609215259552 }, { "id": "identity_103982", "image": "VQA-RAD/images/synpic56061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.21875, "loss": 2.580890655517578 }, { "id": "identity_280728", "image": "DVQA/images/bar_train_00147122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the lowest total value when all values are added together?" }, { "from": "gpt", "value": "The answer is silver" } ], "clip_score": 26.046875, "loss": 2.9343044757843018 }, { "id": "identity_234587", "image": "VQA2.0/images/521096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman holding in her left hand, and what is she doing with her right hand?" }, { "from": "gpt", "value": "The answer is Holding pastry, eating with fingers" } ], "clip_score": 28.828125, "loss": 3.324730396270752 }, { "id": "identity_91804", "image": "IconQA/images/9502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more birds with stripes on their wings or without?" }, { "from": "gpt", "value": "The answer is Equal number" } ], "clip_score": 28.015625, "loss": 4.128866672515869 }, { "id": "identity_57542", "image": "ChartQA/images/4637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the \"Favorable view of the U.S.\" consistently above 50% throughout the given years?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.875, "loss": 3.4671127796173096 }, { "id": "identity_234440", "image": "DocVQA/images/yrmp0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total budget for 1975-76 excluding N.I.H. Research Funds and N.I.H. Training Grants?" }, { "from": "gpt", "value": "The answer is $2,133,035" } ], "clip_score": 28.5625, "loss": 2.007559061050415 }, { "id": "identity_286992", "image": "VizWiz/images/VizWiz_train_00013240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 15.3125, "loss": 4.264607906341553 }, { "id": "identity_644", "image": "FigureQA/images/13344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.0625, "loss": 3.953895092010498 }, { "id": "identity_36730", "image": "DVQA/images/bar_train_00142473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there per group?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 21.90625, "loss": 3.7339305877685547 }, { "id": "identity_213538", "image": "VQA2.0/images/143348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many line judges can be seen on the court?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.78125, "loss": 4.644427299499512 }, { "id": "identity_331725", "image": "VQA2.0/images/146389.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are in the back of the vehicle?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.734375, "loss": 2.3713760375976562 }, { "id": "identity_322137", "image": "A-OKVQA/images/33208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mammal is this sign trying to keep safe by reminding drivers to drive safe?\nChoices:\n(A) human\n(B) dog\n(C) tiger\n(D) elephant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.96875, "loss": 4.262184143066406 }, { "id": "identity_239045", "image": "TextVQA/images/79c6d4bf9082b322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two companies are advertised on the buildings on the right side of the image, and what are their respective industries?" }, { "from": "gpt", "value": "The answer is Rolex (watchmaking) and Commerzbank (banking)" } ], "clip_score": 16.90625, "loss": 2.0970399379730225 }, { "id": "identity_147392", "image": "DocVQA/images/skcv0228_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter mentioned in the document?" }, { "from": "gpt", "value": "The answer is August 30, 1979" } ], "clip_score": 24.75, "loss": 1.5200765132904053 }, { "id": "identity_218886", "image": "A-OKVQA/images/172874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wall closest to the Christmas tree?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.5625, "loss": 3.811943531036377 }, { "id": "identity_227205", "image": "IconQA/images/52694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple shapes are not squares?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.953125, "loss": 2.552323579788208 }, { "id": "identity_60960", "image": "DVQA/images/bar_train_00137793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the \"author\" and \"uncle\" labels?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.25, "loss": 2.426813840866089 }, { "id": "identity_119422", "image": "TextVQA/images/6566c7a2f29c2385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the record label that produced the album depicted in the image?" }, { "from": "gpt", "value": "The answer is Bloodshot" } ], "clip_score": 16.6875, "loss": 4.387402057647705 }, { "id": "identity_180211", "image": "TabMWP/images/6612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which farmer has the closest number of sheep to the average?" }, { "from": "gpt", "value": "The answer is Farmer Wendy" } ], "clip_score": 37.78125, "loss": 2.1680028438568115 }, { "id": "identity_933", "image": "AI2D/images/2648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The stages in the solar cycle are labeled with letters. Which stage is shown at A?\nChoices:\n(A) Winter Solstice\n(B) Vernal Equinox\n(C) Autumnal Equinox\n(D) Summer Solstice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.46875, "loss": 4.00730037689209 }, { "id": "identity_53730", "image": "DVQA/images/bar_train_00086262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of all 'Title' elements across the stacks?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.140625, "loss": 2.12941575050354 }, { "id": "identity_55088", "image": "FigureQA/images/80481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value reached by Saddle Brown?" }, { "from": "gpt", "value": "The answer is 85" } ], "clip_score": 21.109375, "loss": 2.5521178245544434 }, { "id": "identity_318108", "image": "GeoQA+/images/3091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,M是△ABC的边BC的中点,AN是△ABC的外角平分线,BN垂直于AN于点N,且AB=4,MN=2.8,那么AC的长是()\n\nChoices:\n(A) 1.2\n(B) 1.4\n(C) 1.6\n(D) 1.8" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.6875, "loss": 2.4159233570098877 }, { "id": "identity_142497", "image": "ChartQA/images/16309028006002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which disorder has the lowest prevalence percentage?" }, { "from": "gpt", "value": "The answer is Eating disorders" } ], "clip_score": 23.484375, "loss": 2.366431474685669 }, { "id": "identity_310037", "image": "ScienceQA/images/16215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which creature is also adapted to blend in among dead leaves?\nChoices:\n(A) Arctic wolf\n(B) plated leaf chameleon" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.03125, "loss": 3.9289138317108154 }, { "id": "identity_293766", "image": "PMC-VQA/images/PMC5312114_F2_128363.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the linear septations visible in the MRI scan?\nChoices:\n(A) Right lateral ventricle.\n(B) Left medial ventricle.\n(C) Third ventricle.\n(D) Fourth ventricle." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.3125, "loss": 4.118233680725098 }, { "id": "identity_231526", "image": "Super-CLEVR/images/superCLEVR_new_001448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are made of rubber, and what are their colors?" }, { "from": "gpt", "value": "The answer is One object, cyan" } ], "clip_score": 22.15625, "loss": 5.071091651916504 }, { "id": "identity_286860", "image": "DocVQA/images/ttwg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which day of the week is the invitation for?" }, { "from": "gpt", "value": "The answer is wednesday" } ], "clip_score": 15.6875, "loss": 3.286024570465088 }, { "id": "identity_323235", "image": "TabMWP/images/36483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many participants managed to do at least 50 sit-ups but fewer than 70 sit-ups in the competition monitored by Miss Murray?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.59375, "loss": 2.116685152053833 }, { "id": "identity_266053", "image": "AI2D/images/3061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which region corresponds to the cytosol?\nChoices:\n(A) d\n(B) n\n(C) f\n(D) p" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.65625, "loss": 4.0521979331970215 }, { "id": "identity_28434", "image": "TabMWP/images/20805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At Danville Consulting, the head of human resources examined how the number of employees with health care benefits varied in response to policy changes. According to the table, what was the rate of change between 2013 and 2014?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 37.09375, "loss": 3.306365966796875 }, { "id": "identity_14984", "image": "TQA/images/question_images/human_system_eye_6126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the eye is differently colored between people?\nChoices:\n(A) Pupil\n(B) Cornea\n(C) Iris\n(D) Lens" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.828125, "loss": 4.880028247833252 }, { "id": "identity_306971", "image": "TabMWP/images/36022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more Web pages did one person visit compared to another person?" }, { "from": "gpt", "value": "The answer is 591" } ], "clip_score": 30.4375, "loss": 2.554811954498291 }, { "id": "identity_293259", "image": "ScienceQA/images/4840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force towards each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.640625, "loss": 4.1801862716674805 }, { "id": "identity_277491", "image": "Super-CLEVR/images/superCLEVR_new_000983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the big blue car and the tiny school bus on the right side of the small minivan share the same material composition?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 21.546875, "loss": 5.338181972503662 }, { "id": "identity_78189", "image": "TQA/images/abc_question_images/muscle_fiber_17090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure labeled with letter T" }, { "from": "gpt", "value": "The answer is Synaptic vesicles" } ], "clip_score": 28.578125, "loss": 3.0665993690490723 }, { "id": "identity_88865", "image": "GeoQA+/images/3053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment CD when CE is perpendicular to CD and intersects the extension of DB at point E?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.0625, "loss": 2.991734266281128 }, { "id": "identity_88496", "image": "UniGeo/images/873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠C = 35°, what is the length of arc AC in the circle with center O and radius r?" }, { "from": "gpt", "value": "The answer is (70/360) * 2πr" } ], "clip_score": 31.65625, "loss": 1.6035406589508057 }, { "id": "identity_24830", "image": "PMC-VQA/images/PMC5954786_Fig2_314999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the image with the red arrows?\nChoices:\n(A) A local relapse\n(B) Metastatic retroperitoneal lymph node \n(C) Both A and B\n(D) Normal lymph node" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.265625, "loss": 4.417730331420898 }, { "id": "identity_318890", "image": "TextVQA/images/06a7f1d5b80884dd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this place?" }, { "from": "gpt", "value": "The answer is boucheries roger" } ], "clip_score": 31.515625, "loss": 2.7968578338623047 }, { "id": "identity_118076", "image": "PMC-VQA/images/PMC7842001_Fig1_355063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any medical hardware or implants in the knee joint?" }, { "from": "gpt", "value": "The answer is No, there are no medical hardware or implants visible" } ], "clip_score": 33.4375, "loss": 1.6401923894882202 }, { "id": "identity_284492", "image": "IconQA/images/28246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this shape have an open or closed structure?\nChoices:\n(A) closed\n(B) open" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.330180644989014 }, { "id": "identity_258426", "image": "IconQA/images/89231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line is extended by 50% of its current length, what will be the new length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.515625, "loss": 3.2551631927490234 }, { "id": "identity_45054", "image": "DocVQA/images/lfgn0226_46.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the employee number of Leanne A Turner?" }, { "from": "gpt", "value": "The answer is 66294" } ], "clip_score": 18.421875, "loss": 2.9401543140411377 }, { "id": "identity_99569", "image": "IconQA/images/90122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many geese are looking to the left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.40625, "loss": 2.3430657386779785 }, { "id": "identity_21755", "image": "PMC-VQA/images/PMC6617098_children-06-00075-f003_6078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormalities are present in the optic nerves? \nChoices:\n(A) thickening and abnormal T1 signal \n(B) thickening and abnormal T2 signal \n(C) thin and abnormal T1 signal \n(D) thin and abnormal T2 signal " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.828125, "loss": 4.164400100708008 }, { "id": "identity_34247", "image": "PMC-VQA/images/PMC8035493_j_med-2021-0261_fig_001_413271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was identified in the T1 images? \nChoices:\n(A) Several high signals indicating ITH \n(B) Several low signals indicating ITH \n(C) Several low signals indicating mass effects \n(D) Several high signals indicating mass effects " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.1875, "loss": 3.9258220195770264 }, { "id": "identity_83152", "image": "UniGeo/images/1767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OC is the bisector of angle AOB and angle AOC is 75 degrees, what is the measure of angle COB?" }, { "from": "gpt", "value": "The answer is 75 degrees" } ], "clip_score": 32.84375, "loss": 1.8041645288467407 }, { "id": "identity_175925", "image": "IconQA/images/100617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more stars are added to the frame, how many stars will there be in total?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.90625, "loss": 2.1973283290863037 }, { "id": "identity_143714", "image": "DVQA/images/bar_train_00193511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between 'bond' and 'review' for the category 'head'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.453125, "loss": 2.3408079147338867 }, { "id": "identity_250405", "image": "TabMWP/images/36098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of watermelons harvested over the past 6 days?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 34.4375, "loss": 2.2979371547698975 }, { "id": "identity_300697", "image": "VQA-AS/images/12377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it a warm day?" }, { "from": "gpt", "value": "The answer is yes it is" } ], "clip_score": 17.109375, "loss": 3.962087392807007 }, { "id": "identity_95276", "image": "TabMWP/images/4056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of bottles collected by the students?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 27.828125, "loss": 4.03154993057251 }, { "id": "identity_237036", "image": "PlotQA/images/8114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many years is the female life expectancy at birth in Lower middle income greater than 52 years and also greater than the female life expectancy at birth in Haiti?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 30.890625, "loss": 2.483337163925171 }, { "id": "identity_157205", "image": "TQA/images/question_images/seasons_2638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the Earth orbit the Sun in the diagram?" }, { "from": "gpt", "value": "The answer is One orbit" } ], "clip_score": 27.0625, "loss": 3.694028615951538 }, { "id": "identity_81432", "image": "GeoQA+/images/145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If M and N are the midpoints of AB and BC respectively in circle O with AB=6 and ∠ACB=45°, what is the perimeter of triangle AMN?" }, { "from": "gpt", "value": "The answer is The perimeter of triangle AMN is 6+√54 or 13.35 (rounded to two decimal places)" } ], "clip_score": 35.625, "loss": 1.765848159790039 }, { "id": "identity_183651", "image": "A-OKVQA/images/511299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.296875, "loss": 4.283134937286377 }, { "id": "identity_162219", "image": "GeoQA+/images/2826.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, ∠CAB = 25°, and the radius of the circle is 10 units, what is the length of chord CD?" }, { "from": "gpt", "value": "The answer is √(10^2 + 10^2 - 2 * 10 * 10 * cos(130°))" } ], "clip_score": 38.1875, "loss": 1.3769599199295044 }, { "id": "identity_101583", "image": "IconQA/images/41497.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dog images are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.46875, "loss": 2.0682365894317627 }, { "id": "identity_197763", "image": "VizWiz/images/VizWiz_train_00007063.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the tomatoes in this can claimed to be ripened on the vine?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.59375, "loss": 3.2623519897460938 }, { "id": "identity_333883", "image": "VQA2.0/images/401921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this vehicle heading?" }, { "from": "gpt", "value": "The answer is downtown" } ], "clip_score": 19.921875, "loss": 2.9509708881378174 }, { "id": "identity_197879", "image": "A-OKVQA/images/176571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many statues can be seen on the left side of the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 19.421875, "loss": 4.99283504486084 }, { "id": "identity_2835", "image": "TextVQA/images/ba4e957c76519764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what word is in the pink box?" }, { "from": "gpt", "value": "The answer is energie" } ], "clip_score": 26.765625, "loss": 3.5715503692626953 }, { "id": "identity_26768", "image": "TextVQA/images/00f9078d169011a1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what name is on the player in the dugout?" }, { "from": "gpt", "value": "The answer is texas" } ], "clip_score": 24.8125, "loss": 2.7377231121063232 }, { "id": "identity_222715", "image": "Super-CLEVR/images/superCLEVR_new_001753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that is the same color as the airplane?" }, { "from": "gpt", "value": "The answer is Yes, the bicycle" } ], "clip_score": 26.109375, "loss": 2.965585231781006 }, { "id": "identity_298851", "image": "A-OKVQA/images/569889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of bus is the white vehicle? \n(A) tourist bus \n(B) school bus \n(C) double decker \n(D) public bus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.0, "loss": 4.47037410736084 }, { "id": "identity_255832", "image": "A-OKVQA/images/1700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the historical development timeline of the item the person is standing on, and how did it evolve into its modern form in the 20th century?" }, { "from": "gpt", "value": "The answer is Snowboarding evolved significantly from the 1960s to the 20th century" } ], "clip_score": 30.203125, "loss": 1.8417112827301025 }, { "id": "identity_486", "image": "DVQA/images/bar_train_00065422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has highest accuracy for any dataset?" }, { "from": "gpt", "value": "The answer is tea" } ], "clip_score": 31.546875, "loss": 3.206678867340088 }, { "id": "identity_101975", "image": "TabMWP/images/34130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more sheets of paper were used on Friday compared to Tuesday?" }, { "from": "gpt", "value": "The answer is 37 more sheets" } ], "clip_score": 35.875, "loss": 2.425715923309326 }, { "id": "identity_126052", "image": "PMC-VQA/images/PMC6588950_Fig5_497634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the letter associated with this image?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.046875, "loss": 2.959824800491333 }, { "id": "identity_86007", "image": "Geometry3K/images/108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the green figure is reduced by a scale factor of 1/2, what will be the new area?" }, { "from": "gpt", "value": "The answer is 376 cm^2 * 1/4" } ], "clip_score": 32.0625, "loss": 2.471071481704712 }, { "id": "identity_17403", "image": "AI2D/images/1893.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the food web, to the deer population if the grasses dried up in summer?\nChoices:\n(A) Deer population will increase\n(B) Remain unaffected\n(C) Deer population will decrease\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.34375, "loss": 3.8352599143981934 }, { "id": "identity_333480", "image": "PMC-VQA/images/PMC7992970_Fig1_399420.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which surgical procedure had a postoperative image showing necrosis of the femoral head in the left hip?\nChoices:\n(A) SuperPath,\n(B) PLTH,\n(C) Both surgeries,\n(D) Neither surgery" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.96875, "loss": 3.6821815967559814 }, { "id": "identity_58460", "image": "FigureQA/images/54914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Dim Gray more than double the value of Indigo?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.828125, "loss": 3.290119171142578 }, { "id": "identity_127009", "image": "PMC-VQA/images/PMC3948907_Fig2_273076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to produce the contrast in the CT scan?" }, { "from": "gpt", "value": "The answer is Contrast-enhanced CT" } ], "clip_score": 32.65625, "loss": 2.222320318222046 }, { "id": "identity_71462", "image": "ScienceQA/images/14879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter represents the North pole on the magnet to the right?" }, { "from": "gpt", "value": "The answer is N" } ], "clip_score": 19.328125, "loss": 2.6451265811920166 }, { "id": "identity_29194", "image": "DocVQA/images/tgwd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the address of Washington University ?" }, { "from": "gpt", "value": "The answer is saint louis" } ], "clip_score": 23.09375, "loss": 3.53328275680542 }, { "id": "identity_214644", "image": "PMC-VQA/images/PMC7564634_cancers-12-02585-f003_275662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is shown in image (A)?" }, { "from": "gpt", "value": "The answer is T1-weighted MRI sequence" } ], "clip_score": 33.5625, "loss": 1.542775273323059 }, { "id": "identity_187991", "image": "PMC-VQA/images/PMC5585196_F2_211657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What side of the brain is labeled in the image?" }, { "from": "gpt", "value": "The answer is Lateral (left)" } ], "clip_score": 34.125, "loss": 2.547074317932129 }, { "id": "identity_221832", "image": "PMC-VQA/images/PMC7746039_attachment-16569_322514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the midline of the brain shifted in this image?" }, { "from": "gpt", "value": "The answer is No, the midline is not shifted" } ], "clip_score": 30.46875, "loss": 1.9145500659942627 }, { "id": "identity_3930", "image": "DVQA/images/bar_train_00147253.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the largest individual bar in the whole chart?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 27.3125, "loss": 2.557166337966919 }, { "id": "identity_38967", "image": "DVQA/images/bar_train_00174916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the category with the highest value for 'back'?" }, { "from": "gpt", "value": "The answer is street" } ], "clip_score": 28.796875, "loss": 3.6936888694763184 }, { "id": "identity_224983", "image": "CLEVR-Math/images/CLEVR_train_021069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all cyan metal cylinders, yellow spheres, and red cubes. How many objects are left?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.234375, "loss": 2.2424497604370117 }, { "id": "identity_81577", "image": "GeoQA+/images/2724.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point C is 30 degrees north of west from point B, what is the bearing of line BC?" }, { "from": "gpt", "value": "The answer is 330 degrees" } ], "clip_score": 30.875, "loss": 1.9318199157714844 }, { "id": "identity_331622", "image": "DVQA/images/bar_train_00139737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is system" } ], "clip_score": 19.765625, "loss": 3.902695894241333 }, { "id": "identity_266528", "image": "VQA2.0/images/339670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this area a lobby?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.75, "loss": 3.6035263538360596 }, { "id": "identity_268984", "image": "TabMWP/images/12686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children weighed 60 pounds or more when Dr. Alvarado, a pediatrician, measured the weights of all the children who visited her office recently?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.171875, "loss": 2.1390979290008545 }, { "id": "identity_181286", "image": "IconQA/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are both green and triangles?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.203125, "loss": 2.366658926010132 }, { "id": "identity_182321", "image": "A-OKVQA/images/27190.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of apples are displayed in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.140625, "loss": 2.334792137145996 }, { "id": "identity_236603", "image": "PMC-VQA/images/PMC7721805_Fig5_316463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure pointed by the arrows and explain its function in the cell" }, { "from": "gpt", "value": "The answer is Cell membrane, controls substance movement and cell signaling" } ], "clip_score": 24.15625, "loss": 3.7122411727905273 }, { "id": "identity_100806", "image": "TabMWP/images/243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much does it cost to ride on the Choo-Choo Train and the Dizzy Spinner combined?" }, { "from": "gpt", "value": "The answer is $6.10" } ], "clip_score": 30.796875, "loss": 1.9659394025802612 }, { "id": "identity_245747", "image": "DocVQA/images/qhfk0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contact phone number and the topic Nancy Zucker-Brod covers for the Associated Press?" }, { "from": "gpt", "value": "The answer is (202) 776-9828 and tobacco issues" } ], "clip_score": 24.40625, "loss": 2.2660305500030518 }, { "id": "identity_332310", "image": "GeoQA+/images/3116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆中,OD垂直于BC,∠BOD=60°,则∠CAD的度数是多少?\nChoices:\n(A) 15°\n(B) 20°\n(C) 25°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.546875, "loss": 2.476769208908081 }, { "id": "identity_121432", "image": "PMC-VQA/images/PMC5298245_pone.0170597.g002_124739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What developmental stage is represented by the image labeled \"E12\"?" }, { "from": "gpt", "value": "The answer is 12th day of embryonic development" } ], "clip_score": 27.140625, "loss": 2.1867902278900146 }, { "id": "identity_296237", "image": "GeoQA+/images/1076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, vertices A, B, and C of triangle ABC lie on circle O, with angle OAC measuring 40° and angle OBC measuring 15°. What is the degree measure of angle AOB?\nChoices:\n(A) 55°\n(B) 110°\n(C) 120°\n(D) 150°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.21875, "loss": 2.406977891921997 }, { "id": "identity_183298", "image": "VQA2.0/images/404236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are flying in the sky?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.5, "loss": 2.095649242401123 }, { "id": "identity_249064", "image": "VQA-AS/images/2220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are on the floor, and what are they?" }, { "from": "gpt", "value": "The answer is Two objects: a rug and a dog" } ], "clip_score": 28.34375, "loss": 2.997532606124878 }, { "id": "identity_82305", "image": "UniGeo/images/3120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If DE = 18m, what is the length of triangle ADE's perimeter?" }, { "from": "gpt", "value": "The answer is 44.5m" } ], "clip_score": 25.859375, "loss": 1.9452552795410156 }, { "id": "identity_311871", "image": "Geometry3K/images/885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of h.\nChoices:\n(A) \\frac { 15 \\sqrt { 3 } } { 2 }\n(B) 15\n(C) 15 \\sqrt 2\n(D) 15 \\sqrt 3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.03125, "loss": 2.705134391784668 }, { "id": "identity_198685", "image": "VQA2.0/images/30569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the aprons worn by the chefs?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.125, "loss": 3.3771984577178955 }, { "id": "identity_40186", "image": "FigureQA/images/9786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the Dark Gold section to the Gold section?" }, { "from": "gpt", "value": "The answer is A ratio based on estimated angles" } ], "clip_score": 30.046875, "loss": 5.6478705406188965 }, { "id": "identity_272447", "image": "Geometry3K/images/691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In trapezoid QRST, A and B are midpoints of the legs. What is the length of AB?\nChoices:\n(A) 12\n(B) 16\n(C) 20\n(D) 32" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.84375, "loss": 3.1122689247131348 }, { "id": "identity_161784", "image": "UniGeo/images/1821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of side BC of pentagon ABCDE is 9 units, what is the length of side GM of pentagon POGMN?" }, { "from": "gpt", "value": "The answer is 6 units" } ], "clip_score": 34.125, "loss": 1.8173714876174927 }, { "id": "identity_218878", "image": "Super-CLEVR/images/superCLEVR_new_000671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of wheels visible in the image?" }, { "from": "gpt", "value": "The answer is 6 + (2*3) + 2 = 14" } ], "clip_score": 14.9140625, "loss": 1.9556716680526733 }, { "id": "identity_241420", "image": "PMC-VQA/images/PMC8034399_F4_412878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the radiographic appearance of the lesion shown in the CT images, what are the possible differential diagnoses, and which one is most likely given the well-circumscribed nature of the lesion?" }, { "from": "gpt", "value": "The answer is Dentigerous cyst" } ], "clip_score": 31.796875, "loss": 3.0679619312286377 }, { "id": "identity_113237", "image": "A-OKVQA/images/431494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is the blue item positioned within the vehicle?" }, { "from": "gpt", "value": "The answer is Diagonally" } ], "clip_score": 19.046875, "loss": 3.84106707572937 }, { "id": "identity_25522", "image": "AI2D/images/613.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Below is a diagram of the life cycle of an insect. Which part of the diagram represents the pupa?\nChoices:\n(A) C\n(B) D\n(C) B\n(D) A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.78125, "loss": 3.758673906326294 }, { "id": "identity_303376", "image": "DVQA/images/bar_train_00158381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for a dataset?" }, { "from": "gpt", "value": "The answer is one" } ], "clip_score": 34.09375, "loss": 3.5959877967834473 }, { "id": "identity_85827", "image": "Geometry3K/images/74.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the square?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 34.09375, "loss": 1.855590581893921 }, { "id": "identity_26339", "image": "UniGeo/images/1314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,过⊙O外一点P引⊙O的两条切线PA、PB,切点分别是A、B,OP交⊙O于点C,点D是优弧AB上不与点A、点B重合的一个动点,连接AD、CD,若∠APB=76°,则△ADC的度数为()\nChoices:\n(A) 26°\n(B) 20°\n(C) 16°\n(D) 30°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.359375, "loss": 2.395362377166748 }, { "id": "identity_46656", "image": "DocVQA/images/rrcw0217_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long is the editorial review time for Fertility and Sterility?" }, { "from": "gpt", "value": "The answer is 4 months" } ], "clip_score": 19.03125, "loss": 2.016817331314087 }, { "id": "identity_205416", "image": "PMC-VQA/images/PMC7475899_materials-13-03463-f006_249676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What elements are indicated as being present in the composite specimen?" }, { "from": "gpt", "value": "The answer is Tin (Sn) and Lead (Pb)" } ], "clip_score": 20.15625, "loss": 1.4257420301437378 }, { "id": "identity_280220", "image": "TQA/images/question_images/human_system_digestive_6082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the anatomical structure located between the Anus and the Large Intestine?\nChoices:\n(A) Small Intestine\n(B) Rectum\n(C) Stomach\n(D) Lleum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.3125, "loss": 4.126775741577148 }, { "id": "identity_70156", "image": "ScienceQA/images/8762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of year does the image likely depict?" }, { "from": "gpt", "value": "The answer is Spring or Fall" } ], "clip_score": 13.8671875, "loss": 3.4641807079315186 }, { "id": "identity_219164", "image": "VQA2.0/images/18723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees with purple leaves in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.53125, "loss": 3.399428606033325 }, { "id": "identity_274627", "image": "TextVQA/images/c3bbbfde4e516c27.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals that are referenced?" }, { "from": "gpt", "value": "The answer is 5000" } ], "clip_score": 14.28125, "loss": 1.5661098957061768 }, { "id": "identity_105694", "image": "TextVQA/images/6647701c2c99fb1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the author of the books?" }, { "from": "gpt", "value": "The answer is Sylvia Plath" } ], "clip_score": 36.25, "loss": 1.9530282020568848 }, { "id": "identity_208675", "image": "PMC-VQA/images/PMC2857861_F7_62588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the red coloration in the image likely represent?" }, { "from": "gpt", "value": "The answer is Surrounding tissues/blood flow" } ], "clip_score": 27.40625, "loss": 3.227217674255371 }, { "id": "identity_104260", "image": "VQA-AS/images/18492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children are playing in the playground?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.234375, "loss": 2.395916223526001 }, { "id": "identity_329882", "image": "TextVQA/images/03e77cd31f9d6306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person wearing green in the front?" }, { "from": "gpt", "value": "The answer is benedita" } ], "clip_score": 12.984375, "loss": 5.817046642303467 }, { "id": "identity_233310", "image": "AI2D/images/3138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the layers of cells from the cuticle to the spongy mesophyll in order?" }, { "from": "gpt", "value": "The answer is Epidermis, Palisade Mesophyll, Vascular Tissue, Spongy Mesophyll" } ], "clip_score": 28.859375, "loss": 0.7081848978996277 }, { "id": "identity_292748", "image": "PMC-VQA/images/PMC5946363_F1_313143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the CT scan results among the following options: (A) Decreased swelling, (B) Normal tissue, (C) Marked swelling, or (D) Calcification?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.8125, "loss": 4.021430969238281 }, { "id": "identity_167153", "image": "IconQA/images/27684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what will be the seventh image?" }, { "from": "gpt", "value": "The answer is Turtle" } ], "clip_score": 26.453125, "loss": 2.3461647033691406 }, { "id": "identity_133311", "image": "Super-CLEVR/images/superCLEVR_new_001598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are there in total in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.84375, "loss": 1.938209056854248 }, { "id": "identity_105130", "image": "PMC-VQA/images/PMC6333181_acm212498-fig-0001_422859.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of colored markers visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 24.34375, "loss": 2.216920852661133 }, { "id": "identity_297711", "image": "VizWiz/images/VizWiz_train_00013440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.25, "loss": 4.420032024383545 }, { "id": "identity_930", "image": "VQA-RAD/images/synpic51426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI is this?" }, { "from": "gpt", "value": "The answer is T2" } ], "clip_score": 29.359375, "loss": 3.033719301223755 }, { "id": "identity_164302", "image": "UniGeo/images/1479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If DE is parallel to BC and the length of DE is 1, what is the length of BC?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.21875, "loss": 2.6004791259765625 }, { "id": "identity_28699", "image": "A-OKVQA/images/25996.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What motion makes the child smile?\nChoices:\n(A) nodding\n(B) earthquake\n(C) rocking\n(D) sliding" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.2578125, "loss": 4.128521919250488 }, { "id": "identity_21871", "image": "A-OKVQA/images/554502.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the lampshade made of?\nChoices:\n(A) metal\n(B) stained glass\n(C) ceramic\n(D) fabric" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.984375, "loss": 4.577710151672363 }, { "id": "identity_88715", "image": "GeoQA+/images/2919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a tangent is drawn from point M to circle O, what is the length of the tangent segment?" }, { "from": "gpt", "value": "The answer is the length of the tangent segment is 4.8 cm" } ], "clip_score": 28.84375, "loss": 2.024204969406128 }, { "id": "identity_228805", "image": "GeoQA+/images/3635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,把△ABC沿EF对折,叠合后的图形如图所示.若∠A=60°,∠1=85°,求∠B'EF的度数。" }, { "from": "gpt", "value": "The answer is 125°" } ], "clip_score": 25.75, "loss": 1.8391375541687012 }, { "id": "identity_192863", "image": "VizWiz/images/VizWiz_train_00007419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many caplets are contained in this medication bottle?" }, { "from": "gpt", "value": "The answer is 24 caplets" } ], "clip_score": 31.203125, "loss": 1.8156100511550903 }, { "id": "identity_131122", "image": "VQA2.0/images/17078.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees without leaves in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.921875, "loss": 3.2813572883605957 }, { "id": "identity_118630", "image": "VQA2.0/images/567234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a window air conditioning unit present in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a window air conditioning unit" } ], "clip_score": 19.859375, "loss": 1.883525013923645 }, { "id": "identity_78718", "image": "AI2D/images/4804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the figure that has the least number of leaves" }, { "from": "gpt", "value": "The answer is Figure A" } ], "clip_score": 25.4375, "loss": 2.438575267791748 }, { "id": "identity_13662", "image": "TextVQA/images/d68e5a8505f697cb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the player wearing number 12?" }, { "from": "gpt", "value": "The answer is diego" } ], "clip_score": 23.953125, "loss": 3.4055469036102295 }, { "id": "identity_283732", "image": "PMC-VQA/images/PMC6691837_f3-cln_74p1_22906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to evaluate the effectiveness of foam sclerotherapy in the patient?\nChoices:\n(A) MRI\n(B) CT\n(C) X-ray\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 3.633462905883789 }, { "id": "identity_273762", "image": "PMC-VQA/images/PMC4604447_fig3_433019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During the behavior observation of control mice, which part of their body is being suspended?\nChoices:\n(A) tail\n(B) head\n(C) hindlimbs\n(D) forelimbs" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.487858772277832 }, { "id": "identity_333453", "image": "PMC-VQA/images/PMC3900399_pone-0082707-g005_260972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the T-bar in this represent?\nChoices:\n(A) Blood Flow\n(B) Electrical Activity\n(C) Volume Difference\n(D) Cellular Activity" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.484375, "loss": 4.9452314376831055 }, { "id": "identity_196146", "image": "A-OKVQA/images/537139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any graffiti visible on the train cars?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.15625, "loss": 2.8783271312713623 }, { "id": "identity_89982", "image": "Geometry3K/images/920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 6?" }, { "from": "gpt", "value": "The answer is 49" } ], "clip_score": 29.71875, "loss": 2.834329843521118 }, { "id": "identity_47642", "image": "FigureQA/images/54487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the color Yellow?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 28.109375, "loss": 2.911518096923828 }, { "id": "identity_113559", "image": "A-OKVQA/images/17350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs are visible in the image?" }, { "from": "gpt", "value": "The answer is There are two dogs" } ], "clip_score": 21.375, "loss": 3.5459017753601074 }, { "id": "identity_122713", "image": "Super-CLEVR/images/superCLEVR_new_001283.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that is the same color as the car?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.765625, "loss": 3.2175698280334473 }, { "id": "identity_180525", "image": "TabMWP/images/3555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did any country double its number of Nobel Prize winners from the 1970s to the 1980s?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.96875, "loss": 2.8787739276885986 }, { "id": "identity_138613", "image": "PlotQA/images/126338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average population of South Africa in urban agglomerations of more than 1 million over the given years?" }, { "from": "gpt", "value": "The answer is 7500000" } ], "clip_score": 24.9375, "loss": 1.7899905443191528 }, { "id": "identity_61864", "image": "DocVQA/images/zngl0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the Executive Director at Sugar Research Foundation, Inc.?" }, { "from": "gpt", "value": "The answer is Ody H. Lamborn" } ], "clip_score": 16.953125, "loss": 3.7263264656066895 }, { "id": "identity_232086", "image": "VQA-RAD/images/synpic53097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of the mass that indicate it is not calcified, and what other diagnostic features can be observed in the image?" }, { "from": "gpt", "value": "The answer is The mass is not calcified; no significant additional abnormalities observed" } ], "clip_score": 26.234375, "loss": 3.520277976989746 }, { "id": "identity_302918", "image": "GeoQA+/images/5651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆O中,AB是圆O的直径,AB=√{2}AC,∠ODC=12°,则∠DBC的度数为()\nChoices:\n(A) 80°\n(B) 102°\n(C) 120°\n(D) 135°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.625, "loss": 2.3827037811279297 }, { "id": "identity_308554", "image": "ScienceQA/images/17264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a shared property of these four items?\nChoices:\n(A) hard\n(B) opaque\n(C) translucent" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.203125, "loss": 4.609836578369141 }, { "id": "identity_270317", "image": "DVQA/images/bar_train_00124889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of item holders that were sold?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.65625, "loss": 2.495739221572876 }, { "id": "identity_224015", "image": "Super-CLEVR/images/superCLEVR_new_000986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image are the same color as the large thing that is made of the same material as the small wagon?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.265625, "loss": 2.517589807510376 }, { "id": "identity_35897", "image": "VQA2.0/images/36548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bike?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 26.71875, "loss": 3.3589324951171875 }, { "id": "identity_203394", "image": "A-OKVQA/images/395311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sheep are jumping over the purple bar?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.09375, "loss": 2.146087408065796 }, { "id": "identity_34066", "image": "Geometry3K/images/919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the value of x.\nChoices:\n(A) 1.02\n(B) 2.04\n(C) 4.08\n(D) 8.17" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.59375, "loss": 2.7127602100372314 }, { "id": "identity_89078", "image": "Geometry3K/images/162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD in the circle?" }, { "from": "gpt", "value": "The answer is 117.5 degrees" } ], "clip_score": 36.0625, "loss": 1.9179069995880127 }, { "id": "identity_210694", "image": "PMC-VQA/images/PMC5599687_Fig6_215749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label CHP refer to in the image?" }, { "from": "gpt", "value": "The answer is Choroid plexus" } ], "clip_score": 30.796875, "loss": 2.355461835861206 }, { "id": "identity_326538", "image": "GeoQA+/images/2391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在矩形ABCD中,∠ADC的平分线交BC于点E,将一块三角板的直角顶点放在点E处,一条直角边经过点A,另一条直角边交CD于点M,若DM=2CM=4,则BC的长度是()\n选项:\n(A) 8\n(B) 7\n(C) 5\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.8125, "loss": 2.2382924556732178 }, { "id": "identity_320669", "image": "PMC-VQA/images/PMC7460125_diagnostics-10-00534-f002_243447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the top row of images?\nChoices:\n(A) Microscopic muscle fiber cross-sectional area (CSA) maps \n(B) Microscopic fat infiltration (PDFF, no units) maps \n(C) Muscle T2 maps \n(D) Muscle glycogen maps" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.828125, "loss": 3.9228782653808594 }, { "id": "identity_54452", "image": "DocVQA/images/ppxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the specific request made in the note?" }, { "from": "gpt", "value": "The answer is Use updated list of 2-74" } ], "clip_score": 21.828125, "loss": 2.1406562328338623 }, { "id": "identity_92782", "image": "TabMWP/images/35170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more students went down the slide 3 times than 4 times?" }, { "from": "gpt", "value": "The answer is 19-9=10" } ], "clip_score": 28.9375, "loss": 1.796519160270691 }, { "id": "identity_72734", "image": "ScienceQA/images/4000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the magnetic poles shown, what type of interaction will occur between the two magnets?" }, { "from": "gpt", "value": "The answer is Attraction" } ], "clip_score": 20.4375, "loss": 3.011946439743042 }, { "id": "identity_167566", "image": "IconQA/images/14698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of fins per fish in the image?" }, { "from": "gpt", "value": "The answer is 1 fin per fish" } ], "clip_score": 30.0, "loss": 1.880708932876587 }, { "id": "identity_121066", "image": "TextVQA/images/98005b9c129ca906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the activity happening on the beach in the background?" }, { "from": "gpt", "value": "The answer is Sunbathing" } ], "clip_score": 18.8125, "loss": 2.720255136489868 }, { "id": "identity_224167", "image": "TabMWP/images/10905.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average size of all the committees?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 26.171875, "loss": 2.7551162242889404 }, { "id": "identity_337011", "image": "A-OKVQA/images/510949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the status of this truck?\nChoices:\n(A) parked\n(B) going\n(C) waiting\n(D) broken down" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.5, "loss": 4.461268424987793 }, { "id": "identity_117796", "image": "A-OKVQA/images/539719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the laptop?" }, { "from": "gpt", "value": "The answer is Red and Black" } ], "clip_score": 19.984375, "loss": 2.783540964126587 }, { "id": "identity_173563", "image": "IconQA/images/84824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all triangles combined?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 28.828125, "loss": 2.3325252532958984 }, { "id": "identity_276607", "image": "GeoQA+/images/3732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a set square is placed in the position shown in the figure, with ∠C = ∠EDF = 90°, ∠E = 45°, ∠B = 60°, point D on side BC, sides DE and AB intersect at point G. If EF is parallel to AB, what is the degree measure of ∠CDE? \nChoices:\n(A) 105°\n(B) 100°\n(C) 95°\n(D) 75°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.6875, "loss": 2.4441535472869873 }, { "id": "identity_329947", "image": "IconQA/images/106608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this symbol?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.90625, "loss": 3.8723788261413574 }, { "id": "identity_118908", "image": "TextVQA/images/d01b5b23273957fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the fence?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 20.1875, "loss": 2.874039649963379 }, { "id": "identity_249926", "image": "ScienceQA/images/9791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the smallest U.S. state by area and provide its population as of the latest census" }, { "from": "gpt", "value": "The answer is Providence, population 190,934 (as of the 2020 census)" } ], "clip_score": 29.109375, "loss": 1.522995948791504 }, { "id": "identity_190825", "image": "PMC-VQA/images/PMC8745742_ijms-23-00558-f001_160201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accelerating voltage used in capturing this image?" }, { "from": "gpt", "value": "The answer is 10 kilovolts" } ], "clip_score": 19.65625, "loss": 1.4422430992126465 }, { "id": "identity_150217", "image": "DocVQA/images/kzbf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the Ad Hoc Committee?" }, { "from": "gpt", "value": "The answer is Develop a plan for Professional Direction of the Barnes Hospital Emergency Room" } ], "clip_score": 20.125, "loss": 2.8565585613250732 }, { "id": "identity_212160", "image": "VQA2.0/images/136043.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest it is?" }, { "from": "gpt", "value": "The answer is Morning or late afternoon" } ], "clip_score": 13.8515625, "loss": 3.192737579345703 }, { "id": "identity_221399", "image": "Super-CLEVR/images/superCLEVR_new_001761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.0625, "loss": 3.8960444927215576 }, { "id": "identity_34111", "image": "A-OKVQA/images/514191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What season is this?\nChoices:\n(A) summer\n(B) autumn\n(C) winter\n(D) spring" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.359375, "loss": 4.20917272567749 }, { "id": "identity_132057", "image": "VQA-RAD/images/synpic25821.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the orientation of the organs in the image normal?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.484375, "loss": 3.1643142700195312 }, { "id": "identity_120517", "image": "TextVQA/images/9fe732e820cda038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of appliances is predominantly advertised in the store?" }, { "from": "gpt", "value": "The answer is LG" } ], "clip_score": 22.34375, "loss": 2.177461624145508 }, { "id": "identity_276535", "image": "GeoQA+/images/1161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, quadrilateral ABCD is a parallelogram. Line DE is perpendicular to BC at point E, and line BF is perpendicular to CD at point F. Lines DE and BF intersect at point H. If angle A is 60 degrees, what is the measure of angle EHF? \nChoices:\n(A) 100°\n(B) 110°\n(C) 120°\n(D) 150°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.5, "loss": 2.282503604888916 }, { "id": "identity_134044", "image": "FigureQA/images/9422.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the Light Gold bar?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 20.96875, "loss": 2.9402663707733154 }, { "id": "identity_186737", "image": "PMC-VQA/images/PMC4853950_fig1_27037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is likely being imaged in this angiogram?" }, { "from": "gpt", "value": "The answer is Brain" } ], "clip_score": 31.96875, "loss": 2.8060429096221924 }, { "id": "identity_218597", "image": "A-OKVQA/images/483316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen on the platform?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.578125, "loss": 2.2965590953826904 }, { "id": "identity_337311", "image": "ScienceQA/images/7901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Manchester\n(B) Albany\n(C) Tulsa\n(D) Concord" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.59375, "loss": 4.998361587524414 }, { "id": "identity_113657", "image": "TextVQA/images/d1037638d96a3764.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing helmets in the image?" }, { "from": "gpt", "value": "The answer is At least four" } ], "clip_score": 17.015625, "loss": 4.487674713134766 }, { "id": "identity_319929", "image": "DVQA/images/bar_train_00046885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in the chart?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 31.4375, "loss": 3.6465141773223877 }, { "id": "identity_298618", "image": "ScienceQA/images/19773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the bubble map, which sport has been included in both the Summer and Winter Olympics out of the following options:\n(A) ice hockey\n(B) biathlon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.1875, "loss": 4.122919082641602 }, { "id": "identity_202883", "image": "A-OKVQA/images/262985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the boats?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 21.5625, "loss": 3.3207778930664062 }, { "id": "identity_201579", "image": "VQA-AS/images/1032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the girl doing in front of the fireplace?" }, { "from": "gpt", "value": "The answer is Sitting and eating popcorn" } ], "clip_score": 31.15625, "loss": 2.6819632053375244 }, { "id": "identity_184152", "image": "A-OKVQA/images/178666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Rainy" } ], "clip_score": 23.4375, "loss": 2.372328758239746 }, { "id": "identity_272658", "image": "AI2D/images/3241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter in the diagram is the nucleus labeled as?\nChoices:\n(A) J\n(B) A\n(C) K\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.078125, "loss": 3.7082321643829346 }, { "id": "identity_267097", "image": "GeoQA+/images/138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is the diameter of circle O, CB is tangent to circle O at point B, AC intersects circle O at point D, and OD is connected. If ∠C = 58°, what is the degree measure of ∠BOD?\nChoices:\n(A) 32°\n(B) 42°\n(C) 64°\n(D) 84°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.3125, "loss": 2.295318603515625 }, { "id": "identity_267618", "image": "AI2D/images/715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which cycle is depicted in this illustration? Is it:\n(A) Rock Cycle\n(B) Magma\n(C) Metorites\n(D) Sediment" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.46875, "loss": 3.067805051803589 }, { "id": "identity_15949", "image": "A-OKVQA/images/442223.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of seating would one expect to find in this building?\nChoices:\n(A) futons\n(B) booths\n(C) pews\n(D) stools" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.359375, "loss": 4.471151828765869 }, { "id": "identity_123137", "image": "VQA2.0/images/103287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any flowers placed directly on the ground?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.546875, "loss": 3.225822925567627 }, { "id": "identity_62988", "image": "ScienceQA/images/11933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what can be inferred about the viscosity of the substance?" }, { "from": "gpt", "value": "The answer is High viscosity" } ], "clip_score": 29.625, "loss": 3.073155164718628 }, { "id": "identity_279733", "image": "PMC-VQA/images/PMC6868808_Fig1_72142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of MRI scans displayed alterations in nuclear volume and signal intensity?\nChoices:\n(A) T1-weighted sagittal MRI scans\n(B) T2-weighted sagittal MRI scans\n(C) T1-weighted coronal MRI scans\n(D) T2-weighted coronal MRI scans" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.265625, "loss": 3.497056245803833 }, { "id": "identity_121905", "image": "VQA-AS/images/8734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pants the person is wearing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.21875, "loss": 3.6039538383483887 }, { "id": "identity_155692", "image": "ScienceQA/images/14871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the continent located at the top center of the image" }, { "from": "gpt", "value": "The answer is Europe" } ], "clip_score": 29.359375, "loss": 2.9889471530914307 }, { "id": "identity_164756", "image": "UniGeo/images/2804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If O is the circumcenter of triangle ABC and angle BOC is 40 degrees, what is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 34.84375, "loss": 1.7936468124389648 }, { "id": "identity_30956", "image": "TabMWP/images/21969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Emma wrote down how many minutes of TV she watched over the past 4 days. On which day did Emma watch the most TV?\nChoices:\n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.125, "loss": 3.8864924907684326 }, { "id": "identity_193213", "image": "A-OKVQA/images/205243.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats have sails that are not white?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 21.140625, "loss": 2.320343255996704 }, { "id": "identity_20856", "image": "VQA-AS/images/92.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man holding?" }, { "from": "gpt", "value": "The answer is book" } ], "clip_score": 24.875, "loss": 3.9528815746307373 }, { "id": "identity_26258", "image": "MapQA/images/map_81.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Minnesota ?" }, { "from": "gpt", "value": "The answer is 3941066-4028324" } ], "clip_score": 24.46875, "loss": 2.654463768005371 }, { "id": "identity_202568", "image": "PMC-VQA/images/PMC6408898_RSPB20182733F2_447708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the green color in the legend represent?" }, { "from": "gpt", "value": "The answer is Lower quadrant of unaffected hemifield with p > 0.1" } ], "clip_score": 17.6875, "loss": 2.64557147026062 }, { "id": "identity_316974", "image": "Geometry3K/images/103.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle JKL.\nChoices:\n(A) 21\n(B) 31\n(C) 43\n(D) 84" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.65625, "loss": 2.8771839141845703 }, { "id": "identity_56918", "image": "DVQA/images/bar_train_00011611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the preference for 'hall' greater than 50% according to the chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.359375, "loss": 3.2154364585876465 }, { "id": "identity_140581", "image": "DocVQA/images/zznp0227_111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Director of the Agricultural Division?" }, { "from": "gpt", "value": "The answer is Gary D. Simmons" } ], "clip_score": 26.03125, "loss": 2.384262800216675 }, { "id": "identity_28695", "image": "PMC-VQA/images/PMC6968877_pone.0227906.g001_102527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain is affected in the patient mentioned in the caption?\nChoices:\n(A) Left hippocampus \n(B) Right amygdala \n(C) Left amygdala \n(D) Right hippocampus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5625, "loss": 3.789597749710083 }, { "id": "identity_304061", "image": "Super-CLEVR/images/superCLEVR_new_000462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the large bus to the right of the rubber bus in front of the big rubber bike?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 30.546875, "loss": 4.498895168304443 }, { "id": "identity_200603", "image": "PMC-VQA/images/PMC6269333_Fig11_404761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any soft tissue abnormalities be observed in the wrist area in this image?" }, { "from": "gpt", "value": "The answer is No soft tissue abnormalities observed" } ], "clip_score": 33.71875, "loss": 2.5102787017822266 }, { "id": "identity_162342", "image": "GeoQA+/images/1376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若∠A=35°,则∠CDB的度数为多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 31.59375, "loss": 1.88675856590271 }, { "id": "identity_31351", "image": "Super-CLEVR/images/superCLEVR_new_000865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The purple matte car has what size?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 24.8125, "loss": 4.161266803741455 }, { "id": "identity_191194", "image": "Super-CLEVR/images/superCLEVR_new_001076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the airplane's tail?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 13.9453125, "loss": 3.6761276721954346 }, { "id": "identity_47570", "image": "FigureQA/images/87074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label does the Peru line reach its minimum value?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 25.34375, "loss": 2.1940550804138184 }, { "id": "identity_278985", "image": "AI2D/images/4784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is an alternative name for a snapdragon?\n(A) none\n(B) antorphinum majus\n(C) antirrhinum majus\n(D) tinhimum majus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.484375, "loss": 3.5331242084503174 }, { "id": "identity_71189", "image": "TQA/images/question_images/human_system_digestive_1398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the organ labeled A in the digestive process?" }, { "from": "gpt", "value": "The answer is Transporting food to the stomach" } ], "clip_score": 35.5, "loss": 2.828871965408325 }, { "id": "identity_49259", "image": "FigureQA/images/40145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of Steel Blue in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 16.67%" } ], "clip_score": 38.1875, "loss": 1.7111454010009766 }, { "id": "identity_46948", "image": "DVQA/images/bar_train_00166803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many groups have a \"plain\" value greater than 5?" }, { "from": "gpt", "value": "The answer is 0 groups" } ], "clip_score": 19.21875, "loss": 3.5836856365203857 }, { "id": "identity_114133", "image": "TextVQA/images/431967a54cb13eea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What establishment is located to the right of the billboard?" }, { "from": "gpt", "value": "The answer is Ripley's Believe It or Not!" } ], "clip_score": 19.796875, "loss": 1.995478868484497 }, { "id": "identity_136432", "image": "PlotQA/images/150159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of days required to build a warehouse between the years 2002 and 2012?" }, { "from": "gpt", "value": "The answer is 0 days" } ], "clip_score": 25.484375, "loss": 2.4787306785583496 }, { "id": "identity_178279", "image": "IconQA/images/76782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of grid squares covered by the letter 'R' and the letter 'A'?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.953125, "loss": 2.5197789669036865 }, { "id": "identity_254130", "image": "A-OKVQA/images/176736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific activity are the people on the blue boards practicing, and what are the key indicators in the image that support this conclusion?" }, { "from": "gpt", "value": "The answer is Practicing surfing, indicated by surfboards, wetsuits, and instructor" } ], "clip_score": 30.484375, "loss": 2.49229097366333 }, { "id": "identity_132802", "image": "TextVQA/images/5803b2404bf9470b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other traffic signal visible besides the red light?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 31.234375, "loss": 2.9948668479919434 }, { "id": "identity_65407", "image": "ScienceQA/images/13025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the individuals in the image wearing identification badges?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.109375, "loss": 2.9998228549957275 }, { "id": "identity_297896", "image": "A-OKVQA/images/160459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What filling is present in the pastry depicted in the image?\nChoices:\n(A) jelly\n(B) cream\n(C) nothing\n(D) air" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6796875, "loss": 4.225931167602539 }, { "id": "identity_48352", "image": "PlotQA/images/43644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which gender has a higher enrollment rate in Niger in 1973?" }, { "from": "gpt", "value": "The answer is Female" } ], "clip_score": 27.875, "loss": 2.714609146118164 }, { "id": "identity_109726", "image": "VizWiz/images/VizWiz_train_00011584.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fluid ounces are in each bottle of Dr Pepper?" }, { "from": "gpt", "value": "The answer is 24 fluid ounces" } ], "clip_score": 36.125, "loss": 1.3346302509307861 }, { "id": "identity_143815", "image": "DocVQA/images/jybx0223_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage increase in the domestic cigarette sales value compared to the previous year?" }, { "from": "gpt", "value": "The answer is 7.7%" } ], "clip_score": 16.484375, "loss": 1.90165376663208 }, { "id": "identity_123814", "image": "PMC-VQA/images/PMC7271094_Fig1_184019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a label indicating the direction of the view in the ultrasound image?" }, { "from": "gpt", "value": "The answer is Yes, 'V' indicating ventral" } ], "clip_score": 32.125, "loss": 3.5303475856781006 }, { "id": "identity_166961", "image": "TabMWP/images/37746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more lamps are there in 5 offices compared to 3 offices?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 37.25, "loss": 2.1052989959716797 }, { "id": "identity_17491", "image": "PMC-VQA/images/PMC3539948_F2_177557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the image show a reduction in? \nChoices:\n(A) Small myelinated fibers, \n(B) Large non-myelinated fibers, \n(C) Large myelinated fibers, \n(D) Small non-myelinated fibers. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.515625, "loss": 4.424139976501465 }, { "id": "identity_153384", "image": "AI2D/images/2599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the position of the Moon relative to the Earth and Sun affect the tides shown?" }, { "from": "gpt", "value": "The answer is It creates stronger spring tides" } ], "clip_score": 38.5625, "loss": 4.353908538818359 }, { "id": "identity_112152", "image": "PMC-VQA/images/PMC3732620_fig2_222368.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiopacity seen in the lower right quadrant of the X-ray image?" }, { "from": "gpt", "value": "The answer is It is the pelvic bone" } ], "clip_score": 32.40625, "loss": 2.8385612964630127 }, { "id": "identity_157775", "image": "TQA/images/abc_question_images/continental_drift_19083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which section is likely to become the Indian subcontinent?" }, { "from": "gpt", "value": "The answer is Section H" } ], "clip_score": 27.90625, "loss": 3.7693252563476562 }, { "id": "identity_258432", "image": "TextVQA/images/44bb81deb5cd4a9d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the imagery in the postcard reflect El Paso's cultural and geographical diversity, and what might this suggest about the city's attitude towards newcomers?" }, { "from": "gpt", "value": "The answer is El Paso's imagery suggests a welcoming and diverse attitude" } ], "clip_score": 27.546875, "loss": 2.7364161014556885 }, { "id": "identity_33689", "image": "GeoQA+/images/3475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,正方形A、B、C的边长分别为直角三角形的三边长,若正方形A、B的边长分别为3和5,则正方形C的面积为()\nChoices:\n(A) 16\n(B) 12\n(C) 15\n(D) 18" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.296875, "loss": 2.4759647846221924 }, { "id": "identity_15695", "image": "Super-CLEVR/images/superCLEVR_new_000666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the thing that is both in front of the blue matte bus and to the left of the matte scooter?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 31.9375, "loss": 3.778657913208008 }, { "id": "identity_191374", "image": "PMC-VQA/images/PMC4573681_Fig4_424242.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the implant attached to?" }, { "from": "gpt", "value": "The answer is A vertebra" } ], "clip_score": 26.4375, "loss": 3.7054972648620605 }, { "id": "identity_84451", "image": "UniGeo/images/1704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that angle 2 is 115°, what is the measure of the acute angle formed between the shorter side of the rectangle and the base of the triangle ruler?" }, { "from": "gpt", "value": "The answer is 115°" } ], "clip_score": 31.9375, "loss": 1.3324761390686035 }, { "id": "identity_247009", "image": "A-OKVQA/images/363191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the items inside and around the refrigerators, what type of beverages are predominantly stored in these refrigerators, and what does this indicate about the primary function of the establishment?" }, { "from": "gpt", "value": "The answer is Alcoholic beverages, indicating the primary function is a bar" } ], "clip_score": 31.15625, "loss": 2.182368516921997 }, { "id": "identity_260379", "image": "Geometry3K/images/347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of PQ" }, { "from": "gpt", "value": "The answer is 10 + 10 + 6√3" } ], "clip_score": 29.09375, "loss": 2.503446102142334 }, { "id": "identity_302346", "image": "GeoQA+/images/3910.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABDE是⊙O的内接四边形,CE是⊙O的直径,连接BC,DC.若∠BDC=20°,则∠A的度数为多少?\nChoices:\n(A) 90°\n(B) 100°\n(C) 110°\n(D) 120°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.515625, "loss": 2.4141035079956055 }, { "id": "identity_89129", "image": "UniGeo/images/363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given quadrilateral ABCD, with AB parallel to CD, and AB=AC=AD=4, BC=2, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 29.421875, "loss": 1.682281732559204 }, { "id": "identity_190266", "image": "VQA2.0/images/445761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visibly holding a camera?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.859375, "loss": 4.675020694732666 }, { "id": "identity_102990", "image": "TabMWP/images/7731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different departure times are there from Floral Gardens to any destination?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.671875, "loss": 2.0455362796783447 }, { "id": "identity_280915", "image": "VQA-RAD/images/synpic23008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the presence of pneumoperitoneum noticeable?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.671875, "loss": 3.351433515548706 }, { "id": "identity_175809", "image": "IconQA/images/31805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the ruler's total length does the line cover?" }, { "from": "gpt", "value": "The answer is 1/2 or 0.5" } ], "clip_score": 31.140625, "loss": 1.3502576351165771 }, { "id": "identity_55476", "image": "MapQA/images/map_2031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range represented by the color legend?" }, { "from": "gpt", "value": "The answer is 33 to 37" } ], "clip_score": 18.1875, "loss": 2.1878929138183594 }, { "id": "identity_75106", "image": "TQA/images/question_images/periodic_table_7393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the atomic number of the element labeled as \"Xe\" in the Noble Gases group?" }, { "from": "gpt", "value": "The answer is 54" } ], "clip_score": 26.0, "loss": 1.5741405487060547 }, { "id": "identity_327114", "image": "PMC-VQA/images/PMC4304789_pone.0115861.g003_352618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the blue dash lines in the higher magnification of A-C?\nChoices:\n(A) Developed cushion area\n(B) Undeveloped cushion area\n(C) Epithelial area\n(D) Connective tissue area" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 5.131030082702637 }, { "id": "identity_107955", "image": "Super-CLEVR/images/superCLEVR_new_001929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the star-shaped object?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 8.8671875, "loss": 3.799969434738159 }, { "id": "identity_332228", "image": "FigureQA/images/81293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.125, "loss": 3.8852035999298096 }, { "id": "identity_85152", "image": "UniGeo/images/379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABD in the given diagram?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 32.9375, "loss": 1.6269526481628418 }, { "id": "identity_233523", "image": "AI2D/images/1883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organisms in the food web are directly or indirectly dependent on the Asian elephant for their survival?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.796875, "loss": 2.492486000061035 }, { "id": "identity_60544", "image": "ChartQA/images/multi_col_100761.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which age group has the lowest percentage of 'Don't know/no opinion' responses?" }, { "from": "gpt", "value": "The answer is 45-54" } ], "clip_score": 23.5625, "loss": 1.799276351928711 }, { "id": "identity_294075", "image": "CLEVR-Math/images/CLEVR_train_035540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the big blue matte cylinders, how many cylinders remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 29.921875, "loss": 2.1994102001190186 }, { "id": "identity_897", "image": "VQA2.0/images/139686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand headband does the man have?" }, { "from": "gpt", "value": "The answer is nike" } ], "clip_score": 23.921875, "loss": 3.445582151412964 }, { "id": "identity_222574", "image": "VizWiz/images/VizWiz_train_00001477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the birdcage's base?" }, { "from": "gpt", "value": "The answer is Grey or beige" } ], "clip_score": 29.421875, "loss": 4.668773174285889 }, { "id": "identity_144299", "image": "DocVQA/images/zkvm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum notice period required for the survey?" }, { "from": "gpt", "value": "The answer is At least 12 weeks advanced notice" } ], "clip_score": 21.609375, "loss": 2.6554181575775146 }, { "id": "identity_330241", "image": "A-OKVQA/images/329139.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these bananas for?\nChoices:\n(A) planting\n(B) selling\n(C) eating\n(D) disposal" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.28125, "loss": 4.303516387939453 }, { "id": "identity_8751", "image": "A-OKVQA/images/402228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of TV is that?\nChoices:\n(A) crt\n(B) projector\n(C) lcd\n(D) toy" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.59375, "loss": 4.441290378570557 }, { "id": "identity_1950", "image": "A-OKVQA/images/53720.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What role is the man on the red helmet?\nChoices:\n(A) pitcher\n(B) umpire\n(C) catcher\n(D) hitter" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.234375, "loss": 4.303311824798584 }, { "id": "identity_95791", "image": "TabMWP/images/1723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day were the least number of apples picked?" }, { "from": "gpt", "value": "The answer is Wednesday" } ], "clip_score": 36.71875, "loss": 2.2422244548797607 }, { "id": "identity_6556", "image": "AI2D/images/3248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the central part of the cell?\nChoices:\n(A) cytoplasm\n(B) nucleus\n(C) mitochondrion\n(D) golgi apparatus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 4.375692367553711 }, { "id": "identity_115079", "image": "PMC-VQA/images/PMC6322781_pone.0209674.g008_419942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce these images?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 29.25, "loss": 2.9533519744873047 }, { "id": "identity_72682", "image": "AI2D/images/1297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is being illustrated by the arrows in the image?" }, { "from": "gpt", "value": "The answer is Metamorphosis" } ], "clip_score": 25.6875, "loss": 2.626781463623047 }, { "id": "identity_145276", "image": "DocVQA/images/hmwn0226_37.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the potential benefit of decreasing wear mentioned in the document?" }, { "from": "gpt", "value": "The answer is Reduction of wear debris exposure to the patient" } ], "clip_score": 22.9375, "loss": 2.845123529434204 }, { "id": "identity_221034", "image": "VQA2.0/images/330775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image on the screen showing a live event?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.21875, "loss": 2.793351650238037 }, { "id": "identity_122269", "image": "VQA2.0/images/301781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cap on top of the fire hydrant?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 34.40625, "loss": 3.1478943824768066 }, { "id": "identity_136926", "image": "PlotQA/images/41610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did the time to prepare and pay taxes for the least developed countries decrease or increase from 2010 to 2015?" }, { "from": "gpt", "value": "The answer is Decrease" } ], "clip_score": 29.046875, "loss": 2.387746810913086 }, { "id": "identity_307707", "image": "TQA/images/question_images/biomes_6567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which biome is represented by this color?\nChoices:\n(A) Chaparral\n(B) Tropical Rainforest\n(C) Desert\n(D) Tundra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.546875, "loss": 4.604226112365723 }, { "id": "identity_209566", "image": "A-OKVQA/images/40341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity has likely taken place on this street recently?" }, { "from": "gpt", "value": "The answer is Snow shoveling" } ], "clip_score": 31.0625, "loss": 2.7084410190582275 }, { "id": "identity_91222", "image": "Geometry3K/images/1170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of segment WZ" }, { "from": "gpt", "value": "The answer is WZ is approximately 13.9" } ], "clip_score": 30.796875, "loss": 2.698758363723755 }, { "id": "identity_17620", "image": "GeoQA+/images/2945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,O是BC边上的点,以点O为圆心,BO为半径的⊙O与AC相切于点A,D是优弧AB上一点,∠ADB=60°,则∠C的度数是()\nChoices:\n(A) 65°\n(B) 50°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.625, "loss": 2.6658592224121094 }, { "id": "identity_29909", "image": "TextVQA/images/03e77cd31f9d6306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the man wearing green in the front?" }, { "from": "gpt", "value": "The answer is benedita" } ], "clip_score": 13.9921875, "loss": 5.639461517333984 }, { "id": "identity_47876", "image": "DVQA/images/bar_train_00014254.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest value for the category \"male\"?" }, { "from": "gpt", "value": "The answer is age" } ], "clip_score": 22.859375, "loss": 4.119160175323486 }, { "id": "identity_276907", "image": "Geometry3K/images/653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that triangle ABC is congruent to triangle FGH, what is the value of x?\nChoices:\n(A) 2\n(B) 3\n(C) 7\n(D) 13" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.0625, "loss": 2.5866599082946777 }, { "id": "identity_156517", "image": "ScienceQA/images/4431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cooking method used for the pasta in the image?" }, { "from": "gpt", "value": "The answer is Boiling" } ], "clip_score": 31.03125, "loss": 2.931842088699341 }, { "id": "identity_47883", "image": "FigureQA/images/70837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Cornflower line consistently increasing?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.15625, "loss": 3.273181438446045 }, { "id": "identity_47818", "image": "FigureQA/images/33997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of colors represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.0, "loss": 1.9563961029052734 }, { "id": "identity_294484", "image": "PMC-VQA/images/PMC5006384_Fig2_68539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized for assessing the mice? \nOptions:\n(A) CT scan\n(B) PET scan\n(C) MRI\n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.0, "loss": 4.171566963195801 }, { "id": "identity_88541", "image": "GeoQA+/images/2062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠DEB = 60°, what is the measure of ∠EAC?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 35.0, "loss": 1.3965339660644531 }, { "id": "identity_328105", "image": "PMC-VQA/images/PMC5617943_diagnostics-07-00043-f001_221034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the pattern of pulmonary uptake in this?\nChoices:\n(A) Focal \n(B) Diffuse \n(C) Peripheral \n(D) Central" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.03125, "loss": 4.40817403793335 }, { "id": "identity_318655", "image": "PMC-VQA/images/PMC4660938_F0003_448112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of lesions are visible in this scan? \nChoices:\n(A) Corkscrew lesions \n(B) Sclerotic lesions \n(C) Lytic lesions \n(D) Hematoma lesions" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.671875, "loss": 4.506516456604004 }, { "id": "identity_89005", "image": "Geometry3K/images/440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If another triangle is similar to this one with a scale factor of 2, what would be the length of the hypotenuse of the larger triangle?" }, { "from": "gpt", "value": "The answer is 30 feet" } ], "clip_score": 34.375, "loss": 2.2432053089141846 }, { "id": "identity_230974", "image": "CLEVR-Math/images/CLEVR_train_019057.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 more yellow shiny things and remove 1 yellow non-shiny thing, how many yellow things will exist in total?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.59375, "loss": 2.392552375793457 }, { "id": "identity_200911", "image": "TextVQA/images/46c9d074dd514b82.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the make of the white car?" }, { "from": "gpt", "value": "The answer is Vauxhall" } ], "clip_score": 27.765625, "loss": 2.1443374156951904 }, { "id": "identity_124418", "image": "TextVQA/images/0ac74c054a000287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the player's glove?" }, { "from": "gpt", "value": "The answer is Mizuno" } ], "clip_score": 22.28125, "loss": 2.807199716567993 }, { "id": "identity_2630", "image": "FigureQA/images/68978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Dark Salmon have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.984375, "loss": 3.8516178131103516 }, { "id": "identity_27123", "image": "UniGeo/images/191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB和CD都是⊙O的直径,∠AOC=50°,则∠C的度数是()\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.53125, "loss": 2.6781973838806152 }, { "id": "identity_232951", "image": "A-OKVQA/images/176440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years after the large-scale scandal did the company face another significant issue if the next issue occurred in 2020?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 10.03125, "loss": 2.2613656520843506 }, { "id": "identity_104906", "image": "VQA-AS/images/19149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rug on the floor?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 28.765625, "loss": 3.006864547729492 }, { "id": "identity_199484", "image": "VQA2.0/images/523617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the plane's landing gear extended or retracted?" }, { "from": "gpt", "value": "The answer is Extended" } ], "clip_score": 22.4375, "loss": 2.6968352794647217 }, { "id": "identity_178144", "image": "CLEVR-Math/images/CLEVR_train_065196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are shiny?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.75, "loss": 2.71286940574646 }, { "id": "identity_112242", "image": "Super-CLEVR/images/superCLEVR_new_000837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bicycles with the same wheel color as the motorbike furthest from the camera?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.15625, "loss": 3.266688585281372 }, { "id": "identity_288442", "image": "IconQA/images/94026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of rectangles present? \n(A) 1\n(B) 3\n(C) 10\n(D) 4\n(E) 8" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 27.296875, "loss": 2.9640095233917236 }, { "id": "identity_336589", "image": "FigureQA/images/47091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Tomato?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.53125, "loss": 4.083392143249512 }, { "id": "identity_182600", "image": "VQA2.0/images/477392.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What traffic signal is displayed for the direction the carriage is coming from?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 17.25, "loss": 3.6141791343688965 }, { "id": "identity_245270", "image": "PlotQA/images/4997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many years is the percentage of employed females who are attending school greater than 24%, and what is the average percentage of employed females attending school for those years?" }, { "from": "gpt", "value": "The answer is 65%" } ], "clip_score": 37.625, "loss": 3.2936625480651855 }, { "id": "identity_106990", "image": "TextVQA/images/21efd8d0c42e3cfe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the logo on the shirt a circle or a different shape?" }, { "from": "gpt", "value": "The answer is Circle" } ], "clip_score": 25.640625, "loss": 3.366640090942383 }, { "id": "identity_240862", "image": "PMC-VQA/images/PMC8521568_Fig1_86301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the higher magnification images, which specific protein is being visualized, and what is the likely biological process or structure being studied?" }, { "from": "gpt", "value": "The answer is GFP-tagged protein localization in cellular structures or processes" } ], "clip_score": 29.109375, "loss": 2.948103666305542 }, { "id": "identity_81756", "image": "GeoQA+/images/5296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with AD bisecting angle BAC, if BD = 6, AF = 4, and CD = 3, what is the length of segment DE?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.796875, "loss": 2.284925937652588 }, { "id": "identity_66711", "image": "AI2D/images/137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the alignment of the Sun, Moon, and Earth during a new or full moon?" }, { "from": "gpt", "value": "The answer is Syzygy" } ], "clip_score": 32.0, "loss": 2.6613667011260986 }, { "id": "identity_74108", "image": "ScienceQA/images/2607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of plants can be identified in the image?" }, { "from": "gpt", "value": "The answer is An integer number representing the count of different plant types" } ], "clip_score": 19.046875, "loss": 3.713867425918579 }, { "id": "identity_86435", "image": "Geometry3K/images/885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the triangle?" }, { "from": "gpt", "value": "The answer is 97.43 or 97 (rounded to nearest integer)" } ], "clip_score": 30.359375, "loss": 2.3361988067626953 }, { "id": "identity_101050", "image": "CLEVR-Math/images/CLEVR_train_050665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are blue?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.4375, "loss": 2.47605299949646 }, { "id": "identity_12903", "image": "IconQA/images/93562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shapes are triangles?\nChoices:\n(A) 6/7\n(B) 6/8\n(C) 5/12\n(D) 8/12" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.625, "loss": 3.5055832862854004 }, { "id": "identity_61772", "image": "DocVQA/images/hycv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the event titled \"THE INFLUENCE OF ADVERTISING ON ALCOHOLISM\"?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 27.46875, "loss": 2.3811042308807373 }, { "id": "identity_93698", "image": "TabMWP/images/6342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the average per-student arts education budget over the five years shown in the table" }, { "from": "gpt", "value": "The answer is $42.8" } ], "clip_score": 32.09375, "loss": 2.1431143283843994 }, { "id": "identity_565", "image": "TQA/images/question_images/human_system_digestive_6089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the caecum and the rectum?\nChoices:\n(A) esophagus\n(B) small intestine\n(C) colon\n(D) stomach" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.640625, "loss": 4.154755592346191 }, { "id": "identity_124281", "image": "VQA2.0/images/547637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport does the equipment in the image belong to?" }, { "from": "gpt", "value": "The answer is Baseball" } ], "clip_score": 30.046875, "loss": 3.4393470287323 }, { "id": "identity_208648", "image": "TextVQA/images/02250d9442586089.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many abilities are currently available for the character to use?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.578125, "loss": 2.0446815490722656 }, { "id": "identity_80809", "image": "GeoQA+/images/2615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADE?" }, { "from": "gpt", "value": "The answer is 145 degrees" } ], "clip_score": 33.5, "loss": 2.1060383319854736 }, { "id": "identity_219819", "image": "PMC-VQA/images/PMC7287018_F1_189614.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled with \"Hipp\" in the image?" }, { "from": "gpt", "value": "The answer is Hippocampus" } ], "clip_score": 32.03125, "loss": 1.5207868814468384 }, { "id": "identity_293465", "image": "DocVQA/images/xrcy0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the job title of Channing H. Lushbough?" }, { "from": "gpt", "value": "The answer is DIRECTOR& EXECUTIVE SECRETARY" } ], "clip_score": 22.765625, "loss": 2.8363442420959473 }, { "id": "identity_16148", "image": "DocVQA/images/xggn0226_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the surgery mentioned in the Hip radiography routing log?" }, { "from": "gpt", "value": "The answer is 11 26 01" } ], "clip_score": 18.515625, "loss": 2.459444046020508 }, { "id": "identity_81161", "image": "GeoQA+/images/2936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the circumference of the base of the cone?" }, { "from": "gpt", "value": "The answer is C = 2 * π * 6 = 12π" } ], "clip_score": 33.3125, "loss": 1.716174840927124 }, { "id": "identity_288853", "image": "IconQA/images/45839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using paper clips. The line is approximately (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.03125, "loss": 2.413311243057251 }, { "id": "identity_64115", "image": "TQA/images/abc_question_images/faults_11738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of geological feature is likely to form at the boundary labeled K?" }, { "from": "gpt", "value": "The answer is Mid-ocean ridge or rift valley" } ], "clip_score": 27.703125, "loss": 2.494941234588623 }, { "id": "identity_102095", "image": "TabMWP/images/31899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more boys walk to school than ride the bus?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.21875, "loss": 2.242966651916504 }, { "id": "identity_137517", "image": "PlotQA/images/97793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Niger have the lowest CPIA financial sector rating?" }, { "from": "gpt", "value": "The answer is 2014" } ], "clip_score": 28.046875, "loss": 1.7333632707595825 }, { "id": "identity_192892", "image": "PMC-VQA/images/PMC8182051_f7_460808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of midline shift in the image?" }, { "from": "gpt", "value": "The answer is No midline shift" } ], "clip_score": 21.109375, "loss": 3.2433247566223145 }, { "id": "identity_109143", "image": "VQA2.0/images/470211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are parked in front of the cafe?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 27.046875, "loss": 1.9277399778366089 }, { "id": "identity_129338", "image": "PMC-VQA/images/PMC7178603_Fig2_157518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of a collapsed lung in the chest X-ray?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.75, "loss": 2.9235424995422363 }, { "id": "identity_198281", "image": "TextVQA/images/5065fa26c7b0eada.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 8.7578125, "loss": 2.5322582721710205 }, { "id": "identity_316925", "image": "VQA2.0/images/386016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is this?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 18.375, "loss": 3.2629852294921875 }, { "id": "identity_216017", "image": "Super-CLEVR/images/superCLEVR_new_001582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located at the far right?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 26.265625, "loss": 2.8982045650482178 }, { "id": "identity_212327", "image": "TextVQA/images/6598b3b3b68fe255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the watch?" }, { "from": "gpt", "value": "The answer is SEIKO" } ], "clip_score": 31.046875, "loss": 2.8169922828674316 }, { "id": "identity_199714", "image": "VQA-AS/images/14625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person on the left side of the image holding?" }, { "from": "gpt", "value": "The answer is A yellow bucket" } ], "clip_score": 23.09375, "loss": 4.079874515533447 }, { "id": "identity_101305", "image": "TabMWP/images/18222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a ticket for an Alaskan cruise cost compared to a ticket for an Atlantic cruise?" }, { "from": "gpt", "value": "The answer is $6,483" } ], "clip_score": 33.78125, "loss": 2.6325294971466064 }, { "id": "identity_119073", "image": "A-OKVQA/images/58651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible smoke coming from the locomotive?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 25.703125, "loss": 2.980713367462158 }, { "id": "identity_299949", "image": "DocVQA/images/xfhb0228_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the panel discussion scheduled to take place?" }, { "from": "gpt", "value": "The answer is 9:00 a.m." } ], "clip_score": 19.921875, "loss": 1.4435254335403442 }, { "id": "identity_165201", "image": "GeoQA+/images/3512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rectangle ABCD, with AC = 8 and ∠ACB = 30°, what is the length of segment AE?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.75, "loss": 2.1922221183776855 }, { "id": "identity_214293", "image": "VizWiz/images/VizWiz_train_00011469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the background of the image?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 23.65625, "loss": 3.464683771133423 }, { "id": "identity_53864", "image": "ChartQA/images/multi_col_100908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many age groups have a higher 'Linear and timeshift viewing to the TV set' compared to 'Connected viewing to other screens'?" }, { "from": "gpt", "value": "The answer is 5 age groups" } ], "clip_score": 32.46875, "loss": 2.5886881351470947 }, { "id": "identity_92367", "image": "TabMWP/images/16636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the symphony wants to increase the number of silver donors to equal the current number of bronze donors, how many more silver donors are needed?" }, { "from": "gpt", "value": "The answer is 31" } ], "clip_score": 37.375, "loss": 2.97141432762146 }, { "id": "identity_256077", "image": "PMC-VQA/images/PMC8106184_Fig1_435979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the comparative density difference between the greater trochanter of the left femur and the lesser trochanter of the right femur in the preoperative X-ray examination of the girl?" }, { "from": "gpt", "value": "The answer is Greater trochanter of left femur has increased density compared to lesser trochanter of right femur" } ], "clip_score": 31.046875, "loss": 1.3088942766189575 }, { "id": "identity_257456", "image": "MapQA/images/map_1872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states fall into the second smallest category, and what percentage of the total number of states does this represent?" }, { "from": "gpt", "value": "The answer is 11 states, 22%" } ], "clip_score": 27.078125, "loss": 2.2019295692443848 }, { "id": "identity_240822", "image": "A-OKVQA/images/478575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the founder of the company that manufactures the motorcycle shown in the image, and in what year was the company founded?" }, { "from": "gpt", "value": "The answer is William S. Harley and Arthur Davidson, 1903" } ], "clip_score": 20.921875, "loss": 1.4212403297424316 }, { "id": "identity_18088", "image": "FigureQA/images/56967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue Violet the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.71875, "loss": 3.889136552810669 }, { "id": "identity_118422", "image": "Super-CLEVR/images/superCLEVR_new_000656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more purple objects than green objects in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.140625, "loss": 3.144768476486206 }, { "id": "identity_44422", "image": "DVQA/images/bar_train_00177063.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for the 'guest' dataset?" }, { "from": "gpt", "value": "The answer is bone" } ], "clip_score": 33.625, "loss": 3.468773126602173 }, { "id": "identity_70153", "image": "ScienceQA/images/13385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible faucet or water source connected to the bathtub in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.59375, "loss": 2.7573437690734863 }, { "id": "identity_246744", "image": "DocVQA/images/ntlg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount authorized for Frances Ellen Brock, and what is the percentage per account?" }, { "from": "gpt", "value": "The answer is 465.00 and 100.00" } ], "clip_score": 22.25, "loss": 1.7812546491622925 }, { "id": "identity_14808", "image": "VQA2.0/images/126096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it a sunny day?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.1640625, "loss": 3.392826795578003 }, { "id": "identity_64181", "image": "AI2D/images/3665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many atria are present in the human heart?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 30.375, "loss": 3.838426113128662 }, { "id": "identity_177974", "image": "TabMWP/images/30453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of hammers sold in the given days?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 39.03125, "loss": 2.7458975315093994 }, { "id": "identity_141375", "image": "DocVQA/images/jnfx0078_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between total assets and total liabilities?" }, { "from": "gpt", "value": "The answer is $(14,241,488 - 15,357,297) = -$1,115,809" } ], "clip_score": 22.25, "loss": 1.6349496841430664 }, { "id": "identity_127274", "image": "A-OKVQA/images/30565.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Partly cloudy" } ], "clip_score": 12.7421875, "loss": 2.3139612674713135 }, { "id": "identity_272783", "image": "TextVQA/images/301ea0c8391e5bbe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand is displayed on these bottles?" }, { "from": "gpt", "value": "The answer is boylan" } ], "clip_score": 26.375, "loss": 2.9640886783599854 }, { "id": "identity_180806", "image": "CLEVR-Math/images/CLEVR_train_035616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects have a metallic texture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.734375, "loss": 2.7403926849365234 }, { "id": "identity_128532", "image": "A-OKVQA/images/567142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person standing on the paddleboard engaged in?" }, { "from": "gpt", "value": "The answer is Paddleboarding" } ], "clip_score": 24.21875, "loss": 1.9009612798690796 }, { "id": "identity_64780", "image": "ScienceQA/images/16512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Lacey and Akira each have one item the other wants, what could they trade?" }, { "from": "gpt", "value": "The answer is Lacey trades oranges for almonds" } ], "clip_score": 25.046875, "loss": 2.5850329399108887 }, { "id": "identity_302394", "image": "UniGeo/images/2798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C和D都在⊙O上,连结CA、CB、DC、DB。已知∠D=30°,BC=3,则AB的长度是()\n选项:\n(A) 5\n(B) 3√{2}\n(C) 2√{3}\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.46875, "loss": 2.452584981918335 }, { "id": "identity_188119", "image": "TextVQA/images/bc486e9b532b7a06.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the network the iPhone is connected to?" }, { "from": "gpt", "value": "The answer is BITE LV" } ], "clip_score": 23.75, "loss": 4.959737300872803 }, { "id": "identity_203850", "image": "VQA2.0/images/11256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person doing?" }, { "from": "gpt", "value": "The answer is Lying down on a bench" } ], "clip_score": 26.21875, "loss": 2.4642131328582764 }, { "id": "identity_20581", "image": "A-OKVQA/images/248709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The book was based on a series of what by the author?\nChoices:\n(A) songs\n(B) seminars\n(C) dreams\n(D) sermons" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.578125, "loss": 4.1176018714904785 }, { "id": "identity_237801", "image": "FigureQA/images/24791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the colors Blue, Medium Blue, and Dark Blue in ascending order of their brightness" }, { "from": "gpt", "value": "The answer is Dark Blue, Blue, Medium Blue" } ], "clip_score": 36.03125, "loss": 1.2717938423156738 }, { "id": "identity_43136", "image": "DVQA/images/bar_train_00198730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any dataset that has a consistent accuracy value across all algorithms?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.3125, "loss": 3.0842337608337402 }, { "id": "identity_201238", "image": "PMC-VQA/images/PMC6636264_f3_10016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of honeycombing in the lung fields?" }, { "from": "gpt", "value": "The answer is Yes, evidence of honeycombing present" } ], "clip_score": 31.015625, "loss": 2.845594882965088 }, { "id": "identity_24887", "image": "PMC-VQA/images/PMC4502274_fig2_405946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being compressed by the hiatal hernia in the image? \nChoices:\n(A) Right atrium and left pulmonary vein \n(B) Left atrium and right pulmonary vein \n(C) Right ventricle and left atrium \n(D) Left ventricle and right atrium " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.796875, "loss": 4.189347743988037 }, { "id": "identity_47134", "image": "DocVQA/images/myhw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the affiliation of Thomas W. Christopher?" }, { "from": "gpt", "value": "The answer is University of Alabama" } ], "clip_score": 19.6875, "loss": 3.6415085792541504 }, { "id": "identity_126128", "image": "A-OKVQA/images/507551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batter's helmet?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.140625, "loss": 3.4486536979675293 }, { "id": "identity_214677", "image": "A-OKVQA/images/268590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible in the hallway?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.3125, "loss": 4.314634323120117 }, { "id": "identity_106274", "image": "A-OKVQA/images/421978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What beverage company is advertised on the vehicle?" }, { "from": "gpt", "value": "The answer is Heineken" } ], "clip_score": 18.34375, "loss": 1.7576301097869873 }, { "id": "identity_319335", "image": "UniGeo/images/2988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A chair is tilted due to damage, and its shape from the back is shown in the figure. The parallel relationship between two pairs of opposite sides remains unchanged. If ∠1 = 76°, what is the measure of ∠2?\n\nChoices:\n(A) 76°\n(B) 86°\n(C) 104°\n(D) 114°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.421875, "loss": 2.974621534347534 }, { "id": "identity_209907", "image": "VQA2.0/images/182436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals interacting with each other?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.203125, "loss": 2.718618869781494 }, { "id": "identity_168346", "image": "TabMWP/images/13026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of customers that visited the store from Wednesday to Saturday?" }, { "from": "gpt", "value": "The answer is 1,852.5" } ], "clip_score": 29.3125, "loss": 2.1422767639160156 }, { "id": "identity_235381", "image": "A-OKVQA/images/296851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main features of the mode of transport shown in the picture, and how do these features differentiate it from other modes of transport?" }, { "from": "gpt", "value": "The answer is Operates on water, uses vessels, floats, and navigates on water" } ], "clip_score": 18.78125, "loss": 3.868560791015625 }, { "id": "identity_314725", "image": "ChartQA/images/95901028005117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year is the difference between the values of the two lines smallest?" }, { "from": "gpt", "value": "The answer is 1990" } ], "clip_score": 13.1953125, "loss": 2.1483561992645264 }, { "id": "identity_280190", "image": "PMC-VQA/images/PMC7943690_Fig5_387186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What software was utilized for visualizing the images among the following options: Visual Studio, AW server station, Photoshop, and AutoCAD?\nChoices:\n(A) Visual Studio \n(B) AW server station \n(C) Photoshop \n(D) AutoCAD" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.1796875, "loss": 4.510939598083496 }, { "id": "identity_200678", "image": "PMC-VQA/images/PMC6108968_Fig4_358750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.65625, "loss": 2.7141177654266357 }, { "id": "identity_294742", "image": "IconQA/images/71628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What operation has been performed on this letter? \nChoices:\n(A) flip\n(B) slide\n(C) turn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.65625, "loss": 4.521323204040527 }, { "id": "identity_186656", "image": "A-OKVQA/images/187852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bag being held in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 23.765625, "loss": 2.2731897830963135 }, { "id": "identity_222853", "image": "A-OKVQA/images/400045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop?" }, { "from": "gpt", "value": "The answer is Apple" } ], "clip_score": 22.578125, "loss": 3.2056281566619873 }, { "id": "identity_137731", "image": "ChartQA/images/6530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the satisfaction rate exactly 30%?" }, { "from": "gpt", "value": "The answer is 2005" } ], "clip_score": 17.984375, "loss": 1.9342913627624512 }, { "id": "identity_177921", "image": "IconQA/images/7625.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the time displayed in the morning or in the evening?" }, { "from": "gpt", "value": "The answer is Morning" } ], "clip_score": 27.96875, "loss": 3.1299257278442383 }, { "id": "identity_69035", "image": "ScienceQA/images/2658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the poles in the bottom magnet?" }, { "from": "gpt", "value": "The answer is South pole on top, North pole on bottom" } ], "clip_score": 20.03125, "loss": 2.4784581661224365 }, { "id": "identity_319160", "image": "IconQA/images/42027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.9375, "loss": 3.6958110332489014 }, { "id": "identity_82485", "image": "Geometry3K/images/2001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If m∠GHJ = 114, find the measure of m∠GJH" }, { "from": "gpt", "value": "The answer is 66 degrees" } ], "clip_score": 33.84375, "loss": 2.06622052192688 }, { "id": "identity_324303", "image": "GeoQA+/images/5083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,若AP=PB,∠APB=2∠ACB,AC与PB交于点D,且PB=5,PD=3,则AD•DC等于多少?\nChoices:\n(A) 16\n(B) 15\n(C) 7\n(D) 6" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.75, "loss": 2.287266492843628 }, { "id": "identity_38969", "image": "DVQA/images/bar_train_00174916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in values between 'animal' and 'back' for the category 'half'?" }, { "from": "gpt", "value": "The answer is approximately 10^7" } ], "clip_score": 29.09375, "loss": 3.2564330101013184 }, { "id": "identity_174566", "image": "TabMWP/images/27968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Lionel's total earnings went to Federal income tax?" }, { "from": "gpt", "value": "The answer is 12.73%" } ], "clip_score": 30.859375, "loss": 1.5747462511062622 }, { "id": "identity_129346", "image": "A-OKVQA/images/374818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the airline written on the airplane in the foreground?" }, { "from": "gpt", "value": "The answer is Nok Air" } ], "clip_score": 27.421875, "loss": 2.134636163711548 }, { "id": "identity_252187", "image": "PMC-VQA/images/PMC3128619_pone-0021857-g007_101314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the images in Panels A and B, which mouse (Mouse 1 or Mouse 2) shows a more significant reduction in tumor size post-treatment, and by what percentage did the tumor size reduce for that mouse?" }, { "from": "gpt", "value": "The answer is Mouse 1, 60%" } ], "clip_score": 29.21875, "loss": 2.6961376667022705 }, { "id": "identity_145874", "image": "DVQA/images/bar_train_00070215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of accuracy values for the 'author' dataset across all algorithms?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.75, "loss": 3.44915509223938 }, { "id": "identity_209262", "image": "A-OKVQA/images/532976.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the umbrella attached to the yellow truck?" }, { "from": "gpt", "value": "The answer is Yellow and Red" } ], "clip_score": 22.84375, "loss": 2.8895390033721924 }, { "id": "identity_327764", "image": "TQA/images/question_images/solar_system_6296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which planet is nearest to the sun?\nChoices:\n(A) neptune\n(B) venus\n(C) mars\n(D) mercury" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.28125, "loss": 3.3687477111816406 }, { "id": "identity_65859", "image": "TQA/images/question_images/lewis_dot_diagrams_9148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is represented by the triple line in the chemical structure of Acetylene?" }, { "from": "gpt", "value": "The answer is A triple bond" } ], "clip_score": 23.171875, "loss": 2.9441299438476562 }, { "id": "identity_70538", "image": "ScienceQA/images/13411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the food web is a tertiary consumer?" }, { "from": "gpt", "value": "The answer is Black bear" } ], "clip_score": 32.34375, "loss": 2.8430347442626953 }, { "id": "identity_5492", "image": "PlotQA/images/93191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and the lowest percentage of broad money claimed on domestic economy in Cabo Verde ?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.484375, "loss": 3.4567174911499023 }, { "id": "identity_318158", "image": "VQA2.0/images/34624.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pictures are there on this wall?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 17.890625, "loss": 2.3028180599212646 }, { "id": "identity_102752", "image": "IconQA/images/66297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more acorns than squirrels?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.4375, "loss": 2.8994839191436768 }, { "id": "identity_77742", "image": "TQA/images/question_images/chemical_bonding_ionic_9072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bond is formed between the Sodium atom and the Chlorine atom?" }, { "from": "gpt", "value": "The answer is Ionic bond" } ], "clip_score": 32.625, "loss": 2.008373737335205 }, { "id": "identity_65356", "image": "AI2D/images/3084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the \"golgi\" structure do within the cell?" }, { "from": "gpt", "value": "The answer is Modifies, sorts, and packages proteins and lipids" } ], "clip_score": 26.734375, "loss": 1.6456257104873657 }, { "id": "identity_133508", "image": "Super-CLEVR/images/superCLEVR_new_000045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more two-wheeled or four-wheeled vehicles in the image?" }, { "from": "gpt", "value": "The answer is Four-wheeled" } ], "clip_score": 22.15625, "loss": 1.7951691150665283 }, { "id": "identity_45643", "image": "FigureQA/images/56647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Brown section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 45%" } ], "clip_score": 34.09375, "loss": 2.180938243865967 }, { "id": "identity_252863", "image": "A-OKVQA/images/294814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are shown in the image, and what type are they?" }, { "from": "gpt", "value": "The answer is Two horses" } ], "clip_score": 23.4375, "loss": 4.622837066650391 }, { "id": "identity_219066", "image": "A-OKVQA/images/553879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.828125, "loss": 3.10568904876709 }, { "id": "identity_163641", "image": "GeoQA+/images/3036.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line a is parallel to line b, and angle 1 is 60 degrees, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 32.28125, "loss": 1.6768717765808105 }, { "id": "identity_278695", "image": "DVQA/images/bar_train_00040023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the scale used in the chart logarithmic?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.609375, "loss": 3.6359810829162598 }, { "id": "identity_128313", "image": "Super-CLEVR/images/superCLEVR_new_001988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.59375, "loss": 3.873047351837158 }, { "id": "identity_166078", "image": "TabMWP/images/36523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Scooter City sold 5 more scooters, how many would they have sold in total?" }, { "from": "gpt", "value": "The answer is 78" } ], "clip_score": 39.25, "loss": 2.666748523712158 }, { "id": "identity_125975", "image": "VQA-AS/images/14061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bird is flying in the sky?" }, { "from": "gpt", "value": "The answer is A bald eagle" } ], "clip_score": 21.5, "loss": 2.844104051589966 }, { "id": "identity_136824", "image": "FigureQA/images/80362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Navy Blue data points?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 26.0, "loss": 2.786702871322632 }, { "id": "identity_199924", "image": "TextVQA/images/0c9cfd577d3a0f1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible branding or logos in the image?" }, { "from": "gpt", "value": "The answer is Yes, \"CHURROS\" branding is visible" } ], "clip_score": 22.984375, "loss": 2.2841391563415527 }, { "id": "identity_330671", "image": "VQA2.0/images/459763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is in there?" }, { "from": "gpt", "value": "The answer is boats" } ], "clip_score": 20.25, "loss": 3.4999988079071045 }, { "id": "identity_146234", "image": "DocVQA/images/pkxg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Regional Health Director?" }, { "from": "gpt", "value": "The answer is Joseph P. Garen, M.D" } ], "clip_score": 19.671875, "loss": 1.812179684638977 }, { "id": "identity_86321", "image": "Geometry3K/images/1082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle GHK in the circle?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 37.0, "loss": 1.9582688808441162 }, { "id": "identity_156523", "image": "ScienceQA/images/18345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many degrees Celsius did the clear bottle's temperature increase?" }, { "from": "gpt", "value": "The answer is 3°C" } ], "clip_score": 36.21875, "loss": 2.1488687992095947 }, { "id": "identity_62706", "image": "AI2D/images/3235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"Réticulum endoplasmique rugueux\" refer to?" }, { "from": "gpt", "value": "The answer is Rough endoplasmic reticulum" } ], "clip_score": 28.296875, "loss": 1.297142505645752 }, { "id": "identity_107173", "image": "PMC-VQA/images/PMC6945249_f1_94918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the ultrasound probe in relation to the body in this image?" }, { "from": "gpt", "value": "The answer is Perpendicular to the body surface" } ], "clip_score": 32.28125, "loss": 2.67915940284729 }, { "id": "identity_211613", "image": "VQA2.0/images/165126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the train cars?" }, { "from": "gpt", "value": "The answer is The train cars are brown" } ], "clip_score": 21.53125, "loss": 2.9393391609191895 }, { "id": "identity_301618", "image": "PMC-VQA/images/PMC7027275_Fig5_120050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted on the left side of the images?\nChoices:\n(A) Dentin\n(B) Enamel\n(C) Both dentin and enamel\n(D) Neither dentin nor enamel" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.890625, "loss": 4.8243231773376465 }, { "id": "identity_153717", "image": "ScienceQA/images/18545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which solution has a lower concentration of yellow particles?" }, { "from": "gpt", "value": "The answer is Solution A" } ], "clip_score": 32.53125, "loss": 2.232395887374878 }, { "id": "identity_7818", "image": "FigureQA/images/49887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Royal Blue the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.3125, "loss": 3.729081630706787 }, { "id": "identity_107991", "image": "PMC-VQA/images/PMC4964051_Fig2_56474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique was likely used for this image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) stain" } ], "clip_score": 28.21875, "loss": 1.0598678588867188 }, { "id": "identity_311098", "image": "PMC-VQA/images/PMC9044620_Fig1_266346.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is observed in the right lower lobe?\nChoices:\n(A) Fluid-filled cysts \n(B) Semi-solid ground-glass attenuation \n(C) Calcified nodules \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.28125, "loss": 4.487159252166748 }, { "id": "identity_207980", "image": "PMC-VQA/images/PMC3311096_F6_131324.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the structures in the image single or clustered?" }, { "from": "gpt", "value": "The answer is Clustered" } ], "clip_score": 19.703125, "loss": 2.4741127490997314 }, { "id": "identity_149191", "image": "DocVQA/images/llwc0228_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company that submitted the sample?" }, { "from": "gpt", "value": "The answer is The Pillsbury Company" } ], "clip_score": 21.09375, "loss": 3.837242603302002 }, { "id": "identity_60032", "image": "FigureQA/images/74983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Dark Turquoise line ever above the 84 mark on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.9375, "loss": 3.1453640460968018 }, { "id": "identity_38278", "image": "DVQA/images/bar_train_00023903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chart contain any negative values?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.015625, "loss": 3.813366651535034 }, { "id": "identity_154594", "image": "ScienceQA/images/18917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clouds can be seen in the image above the mountain?" }, { "from": "gpt", "value": "The answer is Cumulus clouds" } ], "clip_score": 23.390625, "loss": 2.2561097145080566 }, { "id": "identity_328561", "image": "A-OKVQA/images/14321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is to the left of these motorcycles?\nChoices:\n(A) dog\n(B) boat\n(C) cat\n(D) person" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.421875, "loss": 4.323548316955566 }, { "id": "identity_76196", "image": "TQA/images/abc_question_images/hair_follicles_16989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled 'R' in the diagram?" }, { "from": "gpt", "value": "The answer is Support or base" } ], "clip_score": 26.65625, "loss": 5.053623199462891 }, { "id": "identity_245470", "image": "DVQA/images/bar_train_00086229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of home and drive in radar and travel combined?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 25.0625, "loss": 2.3028030395507812 }, { "id": "identity_196359", "image": "TextVQA/images/2caf2c5eb4a6da24.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What creature is depicted on the back of the green car?" }, { "from": "gpt", "value": "The answer is A dinosaur" } ], "clip_score": 23.78125, "loss": 3.0418217182159424 }, { "id": "identity_178612", "image": "IconQA/images/20502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of marbles in the image?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 23.90625, "loss": 1.759756088256836 }, { "id": "identity_213756", "image": "A-OKVQA/images/311726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What protective structure is above the figure in the chariot?" }, { "from": "gpt", "value": "The answer is A large umbrella or canopy" } ], "clip_score": 27.203125, "loss": 2.5776283740997314 }, { "id": "identity_121865", "image": "VQA2.0/images/225717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What mode of transportation is shown on the left side of the image?" }, { "from": "gpt", "value": "The answer is Horse-drawn carriage" } ], "clip_score": 23.875, "loss": 2.6209025382995605 }, { "id": "identity_230587", "image": "GeoQA+/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A,B,C是⊙O上的三点,已知∠AOB=110°,且弧AC的度数是80°,那么∠ACB的度数是()" }, { "from": "gpt", "value": "The answer is 55°" } ], "clip_score": 30.796875, "loss": 2.129532814025879 }, { "id": "identity_147699", "image": "DocVQA/images/szcg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the stipend offered for the program?" }, { "from": "gpt", "value": "The answer is $855 to $900" } ], "clip_score": 20.109375, "loss": 2.1588358879089355 }, { "id": "identity_189380", "image": "VQA2.0/images/225104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more male or female spectators in the image?" }, { "from": "gpt", "value": "The answer is More male spectators" } ], "clip_score": 20.484375, "loss": 2.4770960807800293 }, { "id": "identity_36033", "image": "IconQA/images/71406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which color is the spinner more likely to land?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.125, "loss": 4.507983684539795 }, { "id": "identity_276850", "image": "FigureQA/images/58162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru not the minimum?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.609375, "loss": 3.711411952972412 }, { "id": "identity_135486", "image": "MapQA/images/map_8135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of savings for the state with the lightest color shade?" }, { "from": "gpt", "value": "The answer is 274045-365327" } ], "clip_score": 29.671875, "loss": 2.281646251678467 }, { "id": "identity_147482", "image": "DocVQA/images/ykdv0228_37.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organization the letter is addressed to?" }, { "from": "gpt", "value": "The answer is The Nutrition Foundation, Inc" } ], "clip_score": 20.859375, "loss": 1.7405729293823242 }, { "id": "identity_298186", "image": "ChartQA/images/4522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the Republican Party?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.828125, "loss": 4.26150369644165 }, { "id": "identity_71399", "image": "ScienceQA/images/4490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the molecule that is composed of only one type of element" }, { "from": "gpt", "value": "The answer is iodine" } ], "clip_score": 29.875, "loss": 1.7464243173599243 }, { "id": "identity_117149", "image": "PMC-VQA/images/PMC7287056_Fig2_189661.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the 'photo pin B' in relation to the 2000 AD caldera?" }, { "from": "gpt", "value": "The answer is Southeast of the caldera" } ], "clip_score": 32.25, "loss": 2.43005108833313 }, { "id": "identity_23518", "image": "PMC-VQA/images/PMC6441667_Fig7_459427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of radiograph was obtained following right mandibular resection? \nChoices:\n(A) Periapical radiograph \n(B) Panoramic radiograph \n(C) Bite-wing radiograph \n(D) Occlusal radiograph " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.640625, "loss": 3.7952511310577393 }, { "id": "identity_158081", "image": "AI2D/images/4329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fruits in the image are shown in a cross-sectional view?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.484375, "loss": 2.585908889770508 }, { "id": "identity_295735", "image": "IconQA/images/17121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there a sufficient number of knives for each plate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.328125, "loss": 3.5451247692108154 }, { "id": "identity_223838", "image": "TabMWP/images/1867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean number of states visited by the students in Zoe's class?" }, { "from": "gpt", "value": "The answer is approximately 10.57" } ], "clip_score": 31.75, "loss": 2.303962469100952 }, { "id": "identity_62128", "image": "DocVQA/images/qpvw0217_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What journal is the article intended for publication?" }, { "from": "gpt", "value": "The answer is American Journal of Obstetrics and Gynecology" } ], "clip_score": 23.390625, "loss": 1.475471019744873 }, { "id": "identity_39301", "image": "DocVQA/images/gzbn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of publication for the second blog post?" }, { "from": "gpt", "value": "The answer is September 16, 2015" } ], "clip_score": 18.6875, "loss": 1.4596105813980103 }, { "id": "identity_225735", "image": "A-OKVQA/images/100581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image, and what kind of animal is the shape of the kite made into?" }, { "from": "gpt", "value": "The answer is six people, reptile" } ], "clip_score": 27.84375, "loss": 3.816856861114502 }, { "id": "identity_331459", "image": "DVQA/images/bar_train_00093271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.34375, "loss": 4.076539039611816 }, { "id": "identity_118388", "image": "A-OKVQA/images/257830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the sky in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 15.515625, "loss": 3.6476645469665527 }, { "id": "identity_190901", "image": "A-OKVQA/images/539299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animals are predominantly in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Sheep" } ], "clip_score": 25.15625, "loss": 2.813314437866211 }, { "id": "identity_184025", "image": "PMC-VQA/images/PMC3483267_pone-0048512-g005_162432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification is likely used to capture this image?" }, { "from": "gpt", "value": "The answer is 400x magnification" } ], "clip_score": 26.125, "loss": 1.8315562009811401 }, { "id": "identity_315387", "image": "VQA2.0/images/168558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this?" }, { "from": "gpt", "value": "The answer is sky" } ], "clip_score": 20.3125, "loss": 3.3075859546661377 }, { "id": "identity_93330", "image": "TabMWP/images/14865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a contestant played an acoustic guitar given they were dressed in velvet?" }, { "from": "gpt", "value": "The answer is 2 / (2 + 3) = 2/5" } ], "clip_score": 36.25, "loss": 1.2940562963485718 }, { "id": "identity_282941", "image": "IconQA/images/92038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of shapes available to choose from?\nChoices:\n(A) 2\n(B) 1\n(C) 4\n(D) 5\n(E) 3" }, { "from": "gpt", "value": "The answer is E" } ], "clip_score": 24.53125, "loss": 3.5444130897521973 }, { "id": "identity_276213", "image": "TextVQA/images/1dac470833f83d4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand is displayed here?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 13.6015625, "loss": 3.9250404834747314 }, { "id": "identity_233055", "image": "TabMWP/images/27828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more miles did Harry fly compared to the combined total miles flown by Trent and Darren?" }, { "from": "gpt", "value": "The answer is 206" } ], "clip_score": 33.8125, "loss": 2.3703274726867676 }, { "id": "identity_269909", "image": "GeoQA+/images/4393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, PA and PB are tangents to circle O, with points of tangency at A and B. If ∠P = 60°, what is the measure of ∠AOB? \n(A) 60°\n(B) 90°\n(C) 120°\n(D) 150°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.9375, "loss": 2.4083986282348633 }, { "id": "identity_38279", "image": "ScienceQA/images/8345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which term matches the picture?\nChoices:\n(A) endotherms\n(B) ectotherms" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 10.6484375, "loss": 4.498591899871826 }, { "id": "identity_25176", "image": "PMC-VQA/images/PMC8155587_F3_453027.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the MRI of the boy with Canavan disease show? \nChoices:\n(A) Normal MRI \n(B) Abnormalities in cortical region only \n(C) Abnormalities in basal ganglia only \n(D) Abnormalities in deep and periventricular white matter as well as basal ganglia, thalami and posterior portion of the corpus callosum. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.6875, "loss": 3.1996915340423584 }, { "id": "identity_88659", "image": "UniGeo/images/345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment OP?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.890625, "loss": 2.482869863510132 }, { "id": "identity_186249", "image": "PMC-VQA/images/PMC7719498_fig1_315933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any bone abnormalities be observed in this image?" }, { "from": "gpt", "value": "The answer is No bone abnormalities observed" } ], "clip_score": 27.34375, "loss": 2.592996835708618 }, { "id": "identity_104385", "image": "VQA2.0/images/561369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buttons are on the knitted object?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 25.953125, "loss": 4.218502998352051 }, { "id": "identity_63275", "image": "AI2D/images/3797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What function does the structure labeled \"A\" perform in the plant?" }, { "from": "gpt", "value": "The answer is Photosynthesis" } ], "clip_score": 32.21875, "loss": 2.4780755043029785 }, { "id": "identity_255949", "image": "IconQA/images/10968.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the letter is flipped horizontally and then rotated 90 degrees clockwise, what will be the final orientation of the letter?" }, { "from": "gpt", "value": "The answer is upside-down 'L'" } ], "clip_score": 24.4375, "loss": 2.755682945251465 }, { "id": "identity_244037", "image": "GeoQA+/images/877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,若∠OAB=28°,且∠OBC=34°,则∠C的大小为多少度?" }, { "from": "gpt", "value": "The answer is 118°" } ], "clip_score": 33.90625, "loss": 2.177316427230835 }, { "id": "identity_37541", "image": "TabMWP/images/3623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Martha need to buy a garnet ring, a jade bracelet, and a pearl necklace?" }, { "from": "gpt", "value": "The answer is 2,070" } ], "clip_score": 28.796875, "loss": 2.665640354156494 }, { "id": "identity_327777", "image": "IconQA/images/20529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is there symmetry in this image?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 3.534846544265747 }, { "id": "identity_47767", "image": "FigureQA/images/74056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Light Seafoam ever surpass the value of 50 on the y-axis label?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.015625, "loss": 2.8230202198028564 }, { "id": "identity_174358", "image": "IconQA/images/38665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is divided into 8 equal sections, what would be the new probability of landing on green?" }, { "from": "gpt", "value": "The answer is 1/8" } ], "clip_score": 27.65625, "loss": 1.4646508693695068 }, { "id": "identity_132069", "image": "VQA2.0/images/333684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the vehicle in the background?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 12.3671875, "loss": 3.1315009593963623 }, { "id": "identity_221773", "image": "TextVQA/images/6c9d32999224c283.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the wine bottle in milliliters?" }, { "from": "gpt", "value": "The answer is 750ml" } ], "clip_score": 23.171875, "loss": 1.7284704446792603 }, { "id": "identity_139551", "image": "PlotQA/images/157022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there in the graph?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.6875, "loss": 2.233633041381836 }, { "id": "identity_284019", "image": "GeoQA+/images/5643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, where angle BAC is 90 degrees, AB is 6, BC is 10, AD and AE are the angle bisectors and medians respectively. BG is perpendicular to AD at point B, intersecting AC at F, and EG is connected. What is the length of segment EG?\nChoices:\n(A) 1/2\n(B) 1\n(C) 3/2\n(D) 2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 2.5218372344970703 }, { "id": "identity_256026", "image": "UniGeo/images/2775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,▱ABCD的一边AB为直径的⊙O过点C,若∠AOC=70°,求∠BCD的度数。" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 32.09375, "loss": 1.5189663171768188 }, { "id": "identity_217096", "image": "VizWiz/images/VizWiz_train_00023875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the web address for the special promotion mentioned in the image?" }, { "from": "gpt", "value": "The answer is www.koingosw.com/promos/appyfridays.mc/" } ], "clip_score": 25.5, "loss": 3.4072458744049072 }, { "id": "identity_83510", "image": "Geometry3K/images/1877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AHE?" }, { "from": "gpt", "value": "The answer is 140 degrees" } ], "clip_score": 27.78125, "loss": 1.9927136898040771 }, { "id": "identity_167222", "image": "IconQA/images/49412.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you group the hearts into pairs, how many pairs can you make?" }, { "from": "gpt", "value": "The answer is 4 pairs and 1 heart left" } ], "clip_score": 30.453125, "loss": 2.545351982116699 }, { "id": "identity_171924", "image": "TabMWP/images/2722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Janessa's other taxes include Social Security tax at 6.2% and Medicare tax at 1.45%, how much did she pay for each of these taxes?" }, { "from": "gpt", "value": "The answer is Social Security: $299.58, Medicare: $70.16" } ], "clip_score": 31.171875, "loss": 1.873289942741394 }, { "id": "identity_32890", "image": "A-OKVQA/images/443805.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is on the clock?\nChoices:\n(A) cat\n(B) cherub statues\n(C) gravy\n(D) hat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 4.258047580718994 }, { "id": "identity_226487", "image": "VizWiz/images/VizWiz_train_00019583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the colors of the lizards and how many legs do they have in total?" }, { "from": "gpt", "value": "The answer is Orange and green, 8 legs" } ], "clip_score": 33.875, "loss": 2.6670403480529785 }, { "id": "identity_41763", "image": "DocVQA/images/fnyp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the letter addressed to?" }, { "from": "gpt", "value": "The answer is Dr. Darby" } ], "clip_score": 23.265625, "loss": 2.73471999168396 }, { "id": "identity_222337", "image": "A-OKVQA/images/402967.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation hub is depicted in the image?" }, { "from": "gpt", "value": "The answer is Bus terminal" } ], "clip_score": 28.40625, "loss": 3.8175742626190186 }, { "id": "identity_240054", "image": "TabMWP/images/30217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total increase in cheese consumption per capita from 1996 to 2006 for all the countries listed in the table?" }, { "from": "gpt", "value": "The answer is 16 kg" } ], "clip_score": 35.90625, "loss": 2.0879809856414795 }, { "id": "identity_225126", "image": "VQA2.0/images/366204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the plane's wings and body, and how many propellers does it have?" }, { "from": "gpt", "value": "The answer is The plane is white and has one propeller" } ], "clip_score": 26.46875, "loss": 2.4642693996429443 }, { "id": "identity_84147", "image": "Geometry3K/images/342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADC?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 32.4375, "loss": 1.910592794418335 }, { "id": "identity_118637", "image": "Super-CLEVR/images/superCLEVR_new_000169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Green and yellow" } ], "clip_score": 22.546875, "loss": 3.8819713592529297 }, { "id": "identity_126485", "image": "Super-CLEVR/images/superCLEVR_new_001862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any vehicle in the image that has the same color as the scooter in the foreground?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.875, "loss": 3.2455191612243652 }, { "id": "identity_40903", "image": "FigureQA/images/75970.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Orchid ever surpass the value of 99 on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.4375, "loss": 3.02030873298645 }, { "id": "identity_156468", "image": "AI2D/images/4051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the layer that is in a liquid state?" }, { "from": "gpt", "value": "The answer is Molten outer core" } ], "clip_score": 21.03125, "loss": 2.5052552223205566 }, { "id": "identity_223279", "image": "A-OKVQA/images/21926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the airplane-like attraction?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 26.46875, "loss": 3.0506367683410645 }, { "id": "identity_307985", "image": "TextVQA/images/01538f3ef5df35c8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the front of the green object say?" }, { "from": "gpt", "value": "The answer is coney island" } ], "clip_score": 18.984375, "loss": 3.2929813861846924 }, { "id": "identity_103374", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the catcher wearing any protective gear?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.234375, "loss": 3.148632526397705 }, { "id": "identity_38402", "image": "DVQA/images/bar_train_00163268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.734375, "loss": 3.611550807952881 }, { "id": "identity_195871", "image": "PMC-VQA/images/PMC6929430_Fig1_90129.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled LV in the image?" }, { "from": "gpt", "value": "The answer is Pumps oxygenated blood to the body" } ], "clip_score": 22.5, "loss": 2.5038294792175293 }, { "id": "identity_47907", "image": "DVQA/images/bar_train_00104859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'future' store have any item that sold less than 2 units?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.84375, "loss": 3.0772743225097656 }, { "id": "identity_258694", "image": "A-OKVQA/images/151371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing features of the truck that indicate it is manufactured by Mack, and how do these features compare to those of trucks manufactured by Volvo, Peterbilt, and Daimler?" }, { "from": "gpt", "value": "The answer is Mack trucks have a distinctive grille and robust cab design" } ], "clip_score": 28.484375, "loss": 3.2460074424743652 }, { "id": "identity_278538", "image": "ScienceQA/images/175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the correct chemical formula for this molecule from the given options. \nOptions:\n(A) H2F\n(B) H2F2\n(C) HF\n(D) HF2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.0078125, "loss": 3.1170690059661865 }, { "id": "identity_327914", "image": "TQA/images/question_images/atomic_structure_9023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many protons are present in this atom?\nChoices:\n(A) 12\n(B) 7\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.75, "loss": 2.542623519897461 }, { "id": "identity_187703", "image": "A-OKVQA/images/386332.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a fan in the room?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.0, "loss": 3.2444908618927 }, { "id": "identity_318910", "image": "FigureQA/images/75983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this color have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.0, "loss": 4.319976806640625 }, { "id": "identity_19661", "image": "MapQA/images/map_10839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Oklahoma ?" }, { "from": "gpt", "value": "The answer is 6.8%-8.1%" } ], "clip_score": 26.140625, "loss": 2.0628626346588135 }, { "id": "identity_58324", "image": "DVQA/images/bar_train_00190816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy?" }, { "from": "gpt", "value": "The answer is check" } ], "clip_score": 31.671875, "loss": 3.590432643890381 }, { "id": "identity_59753", "image": "FigureQA/images/59661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Orange Red section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 45-50%" } ], "clip_score": 37.03125, "loss": 1.8677526712417603 }, { "id": "identity_40309", "image": "MapQA/images/map_8051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code representing the number of apartments in Texas?" }, { "from": "gpt", "value": "The answer is 3726540-4623997" } ], "clip_score": 31.34375, "loss": 2.657850742340088 }, { "id": "identity_90615", "image": "GeoQA+/images/1477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, D, E, and F are the midpoints of sides BC, AC, and AB respectively, and AH is perpendicular to BC at H. If BC = 16, what is the length of segment EF?" }, { "from": "gpt", "value": "The answer is EF = 8" } ], "clip_score": 34.59375, "loss": 2.0318355560302734 }, { "id": "identity_322231", "image": "VQA2.0/images/269997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this person sitting on?" }, { "from": "gpt", "value": "The answer is bench" } ], "clip_score": 24.875, "loss": 2.446202039718628 }, { "id": "identity_155773", "image": "ScienceQA/images/8489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average final temperature of the two vials?" }, { "from": "gpt", "value": "The answer is 4.05°C" } ], "clip_score": 33.09375, "loss": 1.885810136795044 }, { "id": "identity_334845", "image": "Geometry3K/images/627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 3.\nChoices:\n(A) 37.5\n(B) 52.5\n(C) 105\n(D) 133" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.28125, "loss": 3.2865688800811768 }, { "id": "identity_337105", "image": "MapQA/images/map_11526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any data missing on the map?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.3125, "loss": 2.5888822078704834 }, { "id": "identity_193589", "image": "PMC-VQA/images/PMC6353974_Fig3_429551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the picture?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 29.96875, "loss": 2.957674026489258 }, { "id": "identity_31354", "image": "A-OKVQA/images/535692.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where does surfing come from?\nChoices:\n(A) mexico\n(B) hawaii\n(C) polynesia\n(D) argentina" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.5, "loss": 4.025493621826172 }, { "id": "identity_145833", "image": "DocVQA/images/qqmf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the report?" }, { "from": "gpt", "value": "The answer is Edward R. Ricciuti" } ], "clip_score": 21.625, "loss": 3.3620941638946533 }, { "id": "identity_280700", "image": "A-OKVQA/images/559288.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the sign on top of the truck?\nChoices:\n(A) original part\n(B) vandalism\n(C) keep score\n(D) advertising" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.359375, "loss": 4.3134989738464355 }, { "id": "identity_109777", "image": "TextVQA/images/1003153971ee6833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main object in the image?" }, { "from": "gpt", "value": "The answer is A coin" } ], "clip_score": 27.859375, "loss": 4.107921123504639 }, { "id": "identity_146907", "image": "DocVQA/images/ysbd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum allowance for travel not to exceed?" }, { "from": "gpt", "value": "The answer is Jet economy class fare" } ], "clip_score": 14.203125, "loss": 5.069340705871582 }, { "id": "identity_3530", "image": "FigureQA/images/5424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Orchid less than Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.71875, "loss": 3.9711098670959473 }, { "id": "identity_232385", "image": "ScienceQA/images/2304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary components of an eggshell, and how do they differ from the components of a typical mineral?" }, { "from": "gpt", "value": "The answer is Calcium carbonate, proteins, and organic materials; minerals are inorganic" } ], "clip_score": 32.03125, "loss": 2.368422746658325 }, { "id": "identity_117321", "image": "TextVQA/images/4065f16e680434d2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the jersey worn by the player with the basketball?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.46875, "loss": 3.5533175468444824 }, { "id": "identity_63509", "image": "AI2D/images/3260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"nucleus\"?" }, { "from": "gpt", "value": "The answer is Control center of the cell, contains DNA" } ], "clip_score": 25.140625, "loss": 2.4281985759735107 }, { "id": "identity_162874", "image": "Geometry3K/images/2025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diameter of the circle?" }, { "from": "gpt", "value": "The answer is 2√2 * r (where r is the radius)" } ], "clip_score": 33.375, "loss": 2.2254819869995117 }, { "id": "identity_299666", "image": "TextVQA/images/3492f2b6c34e80cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the character depicted on the left side of Duffy?" }, { "from": "gpt", "value": "The answer is roz" } ], "clip_score": 21.734375, "loss": 4.943647861480713 }, { "id": "identity_194188", "image": "A-OKVQA/images/100234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Sunny day" } ], "clip_score": 15.046875, "loss": 3.0664889812469482 }, { "id": "identity_21821", "image": "PMC-VQA/images/PMC3395576_F4_144858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the compound glands? \nChoices:\n(A) Cytoplasm of mucosal epithelium \n(B) Cytoplasm of columnar cells \n(C) Cytoplasm of both \n(D) None of the above " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.765625, "loss": 4.252277374267578 }, { "id": "identity_297835", "image": "IconQA/images/13117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangles present on the frame?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 31.671875, "loss": 2.1646792888641357 }, { "id": "identity_296822", "image": "TextVQA/images/fde64965d8a69a77.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the message that this poster is attempting to convey?" }, { "from": "gpt", "value": "The answer is sluit je aan bij het kunstenaarsverzet" } ], "clip_score": 31.59375, "loss": 1.7170642614364624 }, { "id": "identity_91012", "image": "UniGeo/images/2340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment CE?" }, { "from": "gpt", "value": "The answer is √5 or approximately 2.24" } ], "clip_score": 32.34375, "loss": 2.0353174209594727 }, { "id": "identity_218537", "image": "A-OKVQA/images/558018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary activity being performed in the image?" }, { "from": "gpt", "value": "The answer is Playing baseball or softball" } ], "clip_score": 21.015625, "loss": 2.916640043258667 }, { "id": "identity_94844", "image": "IconQA/images/47241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the triangle and circle were to overlap, how many shapes would be visible?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.625, "loss": 2.0902011394500732 }, { "id": "identity_217260", "image": "PMC-VQA/images/PMC7181419_Fig9_158378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is highlighted by the dotted circle in the image?" }, { "from": "gpt", "value": "The answer is Bone lesion" } ], "clip_score": 27.640625, "loss": 3.778254270553589 }, { "id": "identity_231662", "image": "TabMWP/images/26862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of apples eaten by all clients last week?" }, { "from": "gpt", "value": "The answer is 98" } ], "clip_score": 34.25, "loss": 2.585101842880249 }, { "id": "identity_239466", "image": "FigureQA/images/37016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Medium Purple and Bubblegum?" }, { "from": "gpt", "value": "The answer is 33.34%" } ], "clip_score": 31.796875, "loss": 1.690341830253601 }, { "id": "identity_64457", "image": "ScienceQA/images/10485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is Charleston" } ], "clip_score": 25.71875, "loss": 2.2586419582366943 }, { "id": "identity_196580", "image": "PMC-VQA/images/PMC6423182_Fig19_453342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the gender of the patient from this ultrasound image?" }, { "from": "gpt", "value": "The answer is Yes, female" } ], "clip_score": 35.90625, "loss": 3.751366376876831 }, { "id": "identity_178081", "image": "IconQA/images/67959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two triangles are added to the second row, how many triangles will be in the second row?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.59375, "loss": 2.318067789077759 }, { "id": "identity_99216", "image": "TabMWP/images/20289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of guests the inn hosted over these six days?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 35.78125, "loss": 2.1500487327575684 }, { "id": "identity_175540", "image": "IconQA/images/11349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this shape have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.1875, "loss": 2.4639737606048584 }, { "id": "identity_72999", "image": "ScienceQA/images/3850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the nails in the image are rusty?" }, { "from": "gpt", "value": "The answer is 96% of the nails are rusty" } ], "clip_score": 27.640625, "loss": 1.6186484098434448 }, { "id": "identity_279136", "image": "TQA/images/question_images/lewis_dot_diagrams_9142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of valence electrons in aluminum?\nChoices:\n(A) 2\n(B) 0\n(C) 3\n(D) 1" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.515625, "loss": 3.6535308361053467 }, { "id": "identity_187438", "image": "PMC-VQA/images/PMC7286008_acm212856-fig-0003_189257.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum dose of Gy indicated on the color scale of the image?" }, { "from": "gpt", "value": "The answer is 105 Gy" } ], "clip_score": 20.953125, "loss": 2.44926118850708 }, { "id": "identity_303193", "image": "DVQA/images/bar_train_00018081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value?" }, { "from": "gpt", "value": "The answer is drunk" } ], "clip_score": 20.5625, "loss": 3.3142786026000977 }, { "id": "identity_254494", "image": "VQA2.0/images/65894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are in the image, and how many people are visible on each boat?" }, { "from": "gpt", "value": "The answer is Two boats; the number of people on each boat is not clearly visible" } ], "clip_score": 24.765625, "loss": 2.5278878211975098 }, { "id": "identity_327037", "image": "DVQA/images/bar_train_00076995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the smallest valued individual bar in the entire chart?" }, { "from": "gpt", "value": "The answer is stroke" } ], "clip_score": 25.96875, "loss": 4.114803791046143 }, { "id": "identity_153923", "image": "ScienceQA/images/17263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Chad and Warren were to combine their lists, how many food items would they have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.46875, "loss": 2.2882375717163086 }, { "id": "identity_236017", "image": "GeoQA+/images/5602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,四边形ABCD是⊙O的内接四边形,若∠BOD=88°, ∠AOB=72°, 则∠BCD的度数是()" }, { "from": "gpt", "value": "The answer is 80°" } ], "clip_score": 31.703125, "loss": 1.9345329999923706 }, { "id": "identity_113068", "image": "VQA-RAD/images/synpic54004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to capture this image?" }, { "from": "gpt", "value": "The answer is MRI scan" } ], "clip_score": 34.5625, "loss": 2.7529232501983643 }, { "id": "identity_318080", "image": "ScienceQA/images/3304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which creature's feet are also adapted for sticking to smooth surfaces?\nChoices:\n(A) monitor lizard\n(B) Costa Rica brook frog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.9375, "loss": 4.047082901000977 }, { "id": "identity_247699", "image": "TextVQA/images/804c5ccaa4631670.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the main elements depicted in the art piece and how do they contribute to the overall theme of the battle between Mozilla and King Corporate?" }, { "from": "gpt", "value": "The answer is Mozilla, King Corporate, cityscape, browser icons, open-source vs corporate" } ], "clip_score": 25.765625, "loss": 3.1745107173919678 }, { "id": "identity_320256", "image": "VQA2.0/images/448560.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object pointed up or down?" }, { "from": "gpt", "value": "The answer is up" } ], "clip_score": 11.8828125, "loss": 3.3960087299346924 }, { "id": "identity_129260", "image": "A-OKVQA/images/327807.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.15625, "loss": 3.46126127243042 }, { "id": "identity_27397", "image": "A-OKVQA/images/502409.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the woman's skirt who is number 851 in this ski race?\nChoices:\n(A) blue\n(B) orange\n(C) red\n(D) pink" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.28125, "loss": 3.762622117996216 }, { "id": "identity_63112", "image": "TQA/images/question_images/human_system_digestive_3783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final part of the digestive system before the opening to the outside of the body?" }, { "from": "gpt", "value": "The answer is Rectum" } ], "clip_score": 33.65625, "loss": 2.6334481239318848 }, { "id": "identity_115303", "image": "VQA2.0/images/187435.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the kite directly above the child's hand?" }, { "from": "gpt", "value": "The answer is American flag" } ], "clip_score": 32.34375, "loss": 2.7773303985595703 }, { "id": "identity_229899", "image": "GeoQA+/images/4228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,在Rt△ABC中,∠A=30°,BC=3,D、E分别是直角边BC、AC的中点,求△ADE的面积。" }, { "from": "gpt", "value": "The answer is 0.375√3" } ], "clip_score": 27.90625, "loss": 2.3516478538513184 }, { "id": "identity_65012", "image": "AI2D/images/421.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many organisms in the food web are prey for the Seal?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.53125, "loss": 2.2954933643341064 }, { "id": "identity_251919", "image": "A-OKVQA/images/4834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the possible economic benefits for the person in the image from selling the logs, considering the context of the image?" }, { "from": "gpt", "value": "The answer is Financial income, business investment, community economic support" } ], "clip_score": 25.4375, "loss": 4.249303340911865 }, { "id": "identity_81112", "image": "Geometry3K/images/347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment QR?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 26.21875, "loss": 2.504220724105835 }, { "id": "identity_133302", "image": "VizWiz/images/VizWiz_train_00000973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the surface on which the object is placed?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 21.3125, "loss": 3.453997850418091 }, { "id": "identity_43979", "image": "MapQA/images/map_3106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value greater than 30?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.40625, "loss": 2.3861165046691895 }, { "id": "identity_84855", "image": "GeoQA+/images/5783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠A = 50° and ∠ABC = 65°, what is the measure of ∠BAC?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 30.96875, "loss": 1.6415183544158936 }, { "id": "identity_172319", "image": "IconQA/images/20349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows of flowers are there?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.421875, "loss": 2.4945826530456543 }, { "id": "identity_127966", "image": "A-OKVQA/images/392949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are lining the street?" }, { "from": "gpt", "value": "The answer is Deciduous trees" } ], "clip_score": 25.203125, "loss": 2.4768948554992676 }, { "id": "identity_77497", "image": "ScienceQA/images/16569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Two (parasitic jaeger and rough-legged hawk)" } ], "clip_score": 33.28125, "loss": 1.863122582435608 }, { "id": "identity_74766", "image": "AI2D/images/2940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure in the mouth is responsible for producing saliva?" }, { "from": "gpt", "value": "The answer is Salivary glands (not labeled)" } ], "clip_score": 31.6875, "loss": 1.7095763683319092 }, { "id": "identity_69178", "image": "TQA/images/abc_question_images/photosynthesis_17478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter 'R' symbolize in this image?" }, { "from": "gpt", "value": "The answer is Waste or Decomposed organic matter" } ], "clip_score": 28.8125, "loss": 3.048872470855713 }, { "id": "identity_219594", "image": "PMC-VQA/images/PMC4095734_fig8_305820.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is indicated by the abbreviation 'LLR'?" }, { "from": "gpt", "value": "The answer is Left Lateral Ridge" } ], "clip_score": 23.671875, "loss": 2.4162182807922363 }, { "id": "identity_330624", "image": "A-OKVQA/images/312182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the people doing?\nChoices:\n(A) singing\n(B) marching\n(C) swimming\n(D) protesting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.59375, "loss": 4.099231719970703 }, { "id": "identity_207758", "image": "TextVQA/images/16ce707e52152307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main theme or message conveyed by the event as per the poster?" }, { "from": "gpt", "value": "The answer is Somos Semillas" } ], "clip_score": 33.21875, "loss": 3.04207181930542 }, { "id": "identity_67772", "image": "ScienceQA/images/13013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red chairs are there in the lunchroom?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 25.84375, "loss": 2.051642894744873 }, { "id": "identity_234518", "image": "TextVQA/images/e9a38a77f3e7801f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average temperature for Annapolis from Thursday to Sunday?" }, { "from": "gpt", "value": "The answer is 35" } ], "clip_score": 14.4296875, "loss": 2.648719072341919 }, { "id": "identity_250355", "image": "DocVQA/images/qrwl0079_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost per complete for the CAMEL Filter 85 Prototype, and how much more is the labor cost compared to the WATS cost?" }, { "from": "gpt", "value": "The answer is Average cost per complete is $3.39, labor cost is $3,200 more" } ], "clip_score": 24.265625, "loss": 2.019392251968384 }, { "id": "identity_107024", "image": "VQA2.0/images/353907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any person wearing a helmet in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.734375, "loss": 3.2491295337677 }, { "id": "identity_299767", "image": "DocVQA/images/lfwv0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide me with the phone number for Doubletree hotel?" }, { "from": "gpt", "value": "The answer is 615-244-8200" } ], "clip_score": 21.890625, "loss": 1.4855326414108276 }, { "id": "identity_107097", "image": "VQA2.0/images/156768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible runway lights are there?" }, { "from": "gpt", "value": "The answer is Count the number of lights" } ], "clip_score": 21.9375, "loss": 3.5450353622436523 }, { "id": "identity_88016", "image": "GeoQA+/images/5334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given image, if AO is perpendicular to CD at point E and ∠GBC = 48°, what is the measure of ∠AED?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 34.65625, "loss": 1.7261143922805786 }, { "id": "identity_241782", "image": "GeoQA+/images/1297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,∠BED=20°,若∠ACD的度数为70°,求∠CED的度数。" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 31.359375, "loss": 1.8212683200836182 }, { "id": "identity_261227", "image": "TabMWP/images/25049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of pets owned by the students, and how does it compare to the median number of pets?" }, { "from": "gpt", "value": "The answer is Average ≈ 6.43, Median = 7" } ], "clip_score": 33.46875, "loss": 2.6483869552612305 }, { "id": "identity_71934", "image": "ScienceQA/images/12666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is common in both Ken and Candice's list of wants?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 33.0, "loss": 3.380500316619873 }, { "id": "identity_233962", "image": "ScienceQA/images/12391.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What items can Erin and Harper trade so that Erin gets all the items she wants and Harper gets all the items she wants?" }, { "from": "gpt", "value": "The answer is Erin trades tomatoes for Harper's broccoli" } ], "clip_score": 27.640625, "loss": 2.034271240234375 }, { "id": "identity_302093", "image": "UniGeo/images/2623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given a quadrilateral with four sides, squares are constructed on the outside of each side as shown in the image. Connecting points E, F, G, H, I, J, K, and L, if the area of the quadrilateral is 5, what is the total area of the shaded triangles?\n\nChoices:\n(A) 5\n(B) 10\n(C) 15\n(D) 20" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 37.90625, "loss": 2.521371603012085 }, { "id": "identity_248716", "image": "A-OKVQA/images/216278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a person is sitting on the top deck of the bus and wants to take the metro, how many steps would they need to take to reach the metro entrance, assuming each step is 0.3 meters and the distance from the bus stop to the metro entrance is 15 meters?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 26.234375, "loss": 2.6744611263275146 }, { "id": "identity_12044", "image": "AI2D/images/3661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by j?\nChoices:\n(A) veins\n(B) left ventricle\n(C) aorta\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.6875, "loss": 4.355187892913818 }, { "id": "identity_18263", "image": "VQA2.0/images/246880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this picture look like it's decorated for a certain holiday?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 24.859375, "loss": 2.973123550415039 }, { "id": "identity_9043", "image": "Geometry3K/images/950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A E is a tangent. If A D = 12 and F E = 18, how long is A E to the nearest tenth unit?\nChoices:\n(A) 27.5\n(B) 55\n(C) 60\n(D) 90" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.859375, "loss": 2.472367763519287 }, { "id": "identity_207011", "image": "PMC-VQA/images/PMC5804616_Fig1_273264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the radiological appearance of the lesion indicated by the arrow?" }, { "from": "gpt", "value": "The answer is The lesion has a central necrotic or cavitary appearance" } ], "clip_score": 29.625, "loss": 2.3112967014312744 }, { "id": "identity_122905", "image": "VQA2.0/images/5699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible on the field?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 20.1875, "loss": 2.2791519165039062 }, { "id": "identity_214671", "image": "TextVQA/images/2d407690e9cbd2df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more performers holding green-red or blue-yellow umbrellas?" }, { "from": "gpt", "value": "The answer is Blue-yellow" } ], "clip_score": 34.65625, "loss": 2.2435662746429443 }, { "id": "identity_320439", "image": "IconQA/images/49363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Fred is walking the dog one morning. His watch shows something. What time is it?\nChoices:\n(A) 9:00 P.M.\n(B) 9:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 3.744054079055786 }, { "id": "identity_210089", "image": "TextVQA/images/c8d8d0535bcc1b1e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tentative dates are listed for the event YOUTH'11?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 19.84375, "loss": 2.16951322555542 }, { "id": "identity_210053", "image": "A-OKVQA/images/166067.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the water?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 13.7578125, "loss": 3.489896297454834 }, { "id": "identity_289861", "image": "PMC-VQA/images/PMC7572012_F1_278008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the lung is the mass located?\nChoices:\n(A) Upper lobe of the left lung\n(B) Lower lobe of the left lung\n(C) Upper lobe of the right lung\n(D) Lower lobe of the right lung" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.453125, "loss": 3.764343738555908 }, { "id": "identity_195543", "image": "PMC-VQA/images/PMC3200069_fig1_112904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the foreign objects aligned vertically or horizontally in the X-ray?" }, { "from": "gpt", "value": "The answer is Vertically" } ], "clip_score": 23.78125, "loss": 2.7914905548095703 }, { "id": "identity_163441", "image": "Geometry3K/images/984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the minor arc FH?" }, { "from": "gpt", "value": "The answer is 133 degrees" } ], "clip_score": 33.125, "loss": 1.7962758541107178 }, { "id": "identity_232582", "image": "PMC-VQA/images/PMC8067020_cells-10-00769-f006_424748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the distribution of G-actin in the cytoplasm between the scr and sh-Tp94 (cl 2) samples. What does this indicate about the effect of sh-Tp94 (cl 2) on G-actin distribution?" }, { "from": "gpt", "value": "The answer is sh-Tp94 (cl 2) increases G-actin aggregation in the cytoplasm" } ], "clip_score": 22.125, "loss": 1.4280892610549927 }, { "id": "identity_185705", "image": "PMC-VQA/images/PMC3970346_fig1_277589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the glioma in the MRI image show signs of edema surrounding it?" }, { "from": "gpt", "value": "The answer is Yes, there is edema surrounding the glioma" } ], "clip_score": 33.65625, "loss": 1.8711001873016357 }, { "id": "identity_105748", "image": "PMC-VQA/images/PMC7027275_Fig5_120050.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement at the bottom of the image?" }, { "from": "gpt", "value": "The answer is 1 µm" } ], "clip_score": 21.859375, "loss": 2.2305541038513184 }, { "id": "identity_319563", "image": "TQA/images/question_images/earth_day_night_2639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the earth has rotated to a position where it is summer in the northern hemisphere, where does the vertical ray hit?\nChoices:\n(A) It would hit the western side of the earth\n(B) The vertical ray would not be important\n(C) it would be on the same side\n(D) The vertical ray would be on the north pole" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.390625, "loss": 3.7524051666259766 }, { "id": "identity_331306", "image": "TabMWP/images/4722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each train has 2 cars. How many cars are on some trains?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 30.453125, "loss": 2.0853323936462402 }, { "id": "identity_54839", "image": "DocVQA/images/jfkf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of request is mentioned in the overview?" }, { "from": "gpt", "value": "The answer is Partial clinical hold removal request" } ], "clip_score": 32.84375, "loss": 2.1384406089782715 }, { "id": "identity_265780", "image": "UniGeo/images/128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, a rectangular piece of paper ABCD is folded along EF, with points D and C landing at positions D' and C' respectively. If ∠1 = 56°, what is the degree measure of ∠EFB?\nChoices:\n(A) 56°\n(B) 62°\n(C) 68°\n(D) 124°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.90625, "loss": 2.3390259742736816 }, { "id": "identity_60308", "image": "DVQA/images/bar_train_00125809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest value for the 'gate' category?" }, { "from": "gpt", "value": "The answer is novel" } ], "clip_score": 27.015625, "loss": 3.495404005050659 }, { "id": "identity_212116", "image": "VQA2.0/images/576413.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the landscape in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 19.234375, "loss": 3.1751134395599365 }, { "id": "identity_74792", "image": "ScienceQA/images/18705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the body of water located north of the highlighted country?" }, { "from": "gpt", "value": "The answer is Bismarck Sea" } ], "clip_score": 25.6875, "loss": 2.6662726402282715 }, { "id": "identity_84977", "image": "UniGeo/images/1616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the lines a, b, and c are parallel, and the ratio of AB to BC is 2:3, if the length of BC is 18, what is the length of segment AC?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 31.984375, "loss": 2.137624502182007 }, { "id": "identity_110083", "image": "PMC-VQA/images/PMC4911964_NCV556F5_42411.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the diaphragm appear to be at a normal height in this image?" }, { "from": "gpt", "value": "The answer is Diaphragm at normal height" } ], "clip_score": 29.5625, "loss": 2.749753475189209 }, { "id": "identity_317170", "image": "PMC-VQA/images/PMC6720874_sensors-19-03522-f014_31921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the accelerometer is depicted in the image?\nChoices:\n(A) An etched wafer\n(B) A wafer that was not etched\n(C) The packaging of the accelerometer\n(D) A circuit board used to read the accelerometer" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.375, "loss": 4.8566813468933105 }, { "id": "identity_310639", "image": "CLEVR-Math/images/CLEVR_train_036539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all yellow cubes. Remove all brown spheres. How many cubes remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.125, "loss": 2.483929395675659 }, { "id": "identity_64449", "image": "ScienceQA/images/10295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of snack is shown on the right side of the image?" }, { "from": "gpt", "value": "The answer is Potato chips" } ], "clip_score": 33.96875, "loss": 2.367929458618164 }, { "id": "identity_95761", "image": "IconQA/images/44175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the watch is 10 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 1:50" } ], "clip_score": 26.625, "loss": 1.6619662046432495 }, { "id": "identity_178983", "image": "IconQA/images/85064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 3 green marbles, how many marbles will be left?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.1875, "loss": 2.2655067443847656 }, { "id": "identity_137666", "image": "FigureQA/images/26154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color bar represents the highest value on the graph?" }, { "from": "gpt", "value": "The answer is Medium Turquoise" } ], "clip_score": 31.984375, "loss": 1.7541778087615967 }, { "id": "identity_240992", "image": "DocVQA/images/tkyg0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate difference in calories per day between the \"All foods\" and \"Grain products\" curves in the year 1950?" }, { "from": "gpt", "value": "The answer is 2,500 calories per day" } ], "clip_score": 24.6875, "loss": 1.5232162475585938 }, { "id": "identity_116003", "image": "TextVQA/images/0a247f3022c40cf1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing green jerseys?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.65625, "loss": 4.182275295257568 }, { "id": "identity_76726", "image": "TQA/images/question_images/human_system_circulatory_3661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as Pulmonary Valve?" }, { "from": "gpt", "value": "The answer is Controls blood flow to pulmonary artery" } ], "clip_score": 33.125, "loss": 2.293123960494995 }, { "id": "identity_276202", "image": "DocVQA/images/ljkm0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What information is included in section B?" }, { "from": "gpt", "value": "The answer is Nutrients and nutrition terms," } ], "clip_score": 18.671875, "loss": 2.84572172164917 }, { "id": "identity_294847", "image": "A-OKVQA/images/250802.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activities are permissible for this individual to engage in while in the state of California?\nChoices:\n(A) practice medicine\n(B) teach\n(C) fish\n(D) drive" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.5546875, "loss": 4.155457496643066 }, { "id": "identity_315389", "image": "A-OKVQA/images/60599.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What in the foreground is commonly used as a food source?\nChoices:\n(A) brown animal\n(B) black animal\n(C) fence material\n(D) ground material" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.015625, "loss": 3.958493709564209 }, { "id": "identity_230538", "image": "DVQA/images/bar_train_00173544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value represented by all the bars combined?" }, { "from": "gpt", "value": "The answer is 210" } ], "clip_score": 25.515625, "loss": 2.271897554397583 }, { "id": "identity_246909", "image": "IconQA/images/64325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each plate originally had 8 apple slices and 3 slices were eaten from each plate, how many apple slices are left in total?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 28.625, "loss": 2.7087156772613525 }, { "id": "identity_274402", "image": "VQA-AS/images/15200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it possible for him to fall?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.75, "loss": 3.499253749847412 }, { "id": "identity_71737", "image": "AI2D/images/3361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the flower is responsible for supporting the anther?" }, { "from": "gpt", "value": "The answer is Filament" } ], "clip_score": 31.828125, "loss": 2.802849054336548 }, { "id": "identity_273725", "image": "DVQA/images/bar_train_00089586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Across all categories, which object is the most preferred by the highest number of people?" }, { "from": "gpt", "value": "The answer is ranch" } ], "clip_score": 27.28125, "loss": 2.8467330932617188 }, { "id": "identity_97662", "image": "IconQA/images/53849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does the shape have?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.671875, "loss": 2.7072651386260986 }, { "id": "identity_217853", "image": "PMC-VQA/images/PMC4141336_Fig10_314822.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 29.234375, "loss": 2.5198974609375 }, { "id": "identity_146638", "image": "FigureQA/images/47091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of segments represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.78125, "loss": 2.1463382244110107 }, { "id": "identity_244152", "image": "VQA2.0/images/405695.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of items are advertised in the store window, and what are they?" }, { "from": "gpt", "value": "The answer is six items: Riding Crops & Whips, Irish Blackthorns, Malacca Canes, Life Preservers, Dagger Canes, Swordsticks" } ], "clip_score": 34.03125, "loss": 1.3701800107955933 }, { "id": "identity_331791", "image": "TextVQA/images/9aff1f3584844e37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many axles?" }, { "from": "gpt", "value": "The answer is aviva stadium" } ], "clip_score": 11.7578125, "loss": 5.728671073913574 }, { "id": "identity_242448", "image": "IconQA/images/39203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 3 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 29.859375, "loss": 2.0576205253601074 }, { "id": "identity_139177", "image": "DocVQA/images/fkbw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Around what year does the photo on the lower part of the cover show a beet dump at Berthoud?" }, { "from": "gpt", "value": "The answer is 1905" } ], "clip_score": 20.4375, "loss": 1.6689881086349487 }, { "id": "identity_336750", "image": "TextVQA/images/02250d9442586089.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of this screen?" }, { "from": "gpt", "value": "The answer is benq" } ], "clip_score": 27.09375, "loss": 3.7964770793914795 }, { "id": "identity_140735", "image": "ChartQA/images/14929682007417.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Economic Freedom Score of Zambia?" }, { "from": "gpt", "value": "The answer is 6.91" } ], "clip_score": 33.15625, "loss": 1.9706865549087524 }, { "id": "identity_143683", "image": "PlotQA/images/122776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the highest increase in rural population percentage from 1960 to 1962?" }, { "from": "gpt", "value": "The answer is Thailand" } ], "clip_score": 29.40625, "loss": 2.7793807983398438 }, { "id": "identity_94432", "image": "TabMWP/images/34696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Akira buys 2 kilograms of blueberry bars and 1 kilogram of lemon bars, how much will she spend in total?" }, { "from": "gpt", "value": "The answer is $4" } ], "clip_score": 34.5, "loss": 2.870953321456909 }, { "id": "identity_22026", "image": "A-OKVQA/images/181084.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the police monitoring?\nChoices:\n(A) accident\n(B) balloon sale\n(C) riot\n(D) parade" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.40625, "loss": 4.195700645446777 }, { "id": "identity_313974", "image": "MapQA/images/map_388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the highest value in this region?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.71875, "loss": 2.6974124908447266 }, { "id": "identity_241871", "image": "TextVQA/images/87f1ab9a38f1e67f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boxes labeled \"Woodford Reserve\" are visible in the image, and what is the total number of bottles if each box contains 12 bottles?" }, { "from": "gpt", "value": "The answer is 60 bottles" } ], "clip_score": 18.65625, "loss": 2.5794007778167725 }, { "id": "identity_11186", "image": "VizWiz/images/VizWiz_train_00006416.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this page?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 11.109375, "loss": 4.1228203773498535 }, { "id": "identity_297346", "image": "PMC-VQA/images/PMC5775587_Fig2_266137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What abnormality is observed in the MRI scan based on the following choices?\nChoices:\n(A) Hypointense signal in DWI and ADC \n(B) Hyperintense signal in DWI and hypointense signal in ADC \n(C) Normal MRI \n(D) Hyperintense signal in ADC and normal DWI" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.484375, "loss": 3.4918224811553955 }, { "id": "identity_96012", "image": "TabMWP/images/15075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the most expensive and least expensive item?" }, { "from": "gpt", "value": "The answer is $0.74" } ], "clip_score": 19.703125, "loss": 2.127392053604126 }, { "id": "identity_296808", "image": "PMC-VQA/images/PMC4455327_Fig4_393480.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this study, what is the orientation of the MR images?\nOptions:\n(A) Horizontal plane\n(B) Sagittal plane\n(C) Oblique plane\n(D) Vertical plane" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.15625, "loss": 4.839847087860107 }, { "id": "identity_254239", "image": "ChartQA/images/7065.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between which two consecutive years did the response rate experience the largest percentage decrease, and what was the percentage decrease?" }, { "from": "gpt", "value": "The answer is 2009 to 2012, 40.00%" } ], "clip_score": 28.234375, "loss": 1.677181363105774 }, { "id": "identity_623", "image": "ScienceQA/images/5011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of the persimmon tree?\nChoices:\n(A) swallowtail caterpillar\n(B) beaver" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.09375, "loss": 4.595068454742432 }, { "id": "identity_187349", "image": "A-OKVQA/images/69189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many chickens are visible on the bicycle?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.734375, "loss": 2.2519371509552 }, { "id": "identity_19976", "image": "VQA2.0/images/34469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the animals laying on?" }, { "from": "gpt", "value": "The answer is dirt" } ], "clip_score": 23.5625, "loss": 2.7492525577545166 }, { "id": "identity_269794", "image": "FigureQA/images/60105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Turquoise the curve with the smallest area underneath it?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.171875, "loss": 3.9804420471191406 }, { "id": "identity_263097", "image": "DocVQA/images/kshk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for making the provisional reservations?" }, { "from": "gpt", "value": "The answer is FOR GREAT WESTERN UNITED CORPORATION FOR THE EVENING OF JANUARY 30, 1969." } ], "clip_score": 23.359375, "loss": 1.263357400894165 }, { "id": "identity_194828", "image": "PMC-VQA/images/PMC5101592_fig-8_93353.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are labeled with the letters 'LN' and 'HK' in the image?" }, { "from": "gpt", "value": "The answer is Lymph Node and Kidney" } ], "clip_score": 21.46875, "loss": 2.341632604598999 }, { "id": "identity_197082", "image": "TextVQA/images/73f8e2adf464801e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the players' uniforms?" }, { "from": "gpt", "value": "The answer is Camouflage" } ], "clip_score": 19.046875, "loss": 2.4570443630218506 }, { "id": "identity_108504", "image": "VQA-AS/images/18637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fish visible in the ponds?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.078125, "loss": 3.028688907623291 }, { "id": "identity_23292", "image": "FigureQA/images/50278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.046875, "loss": 4.038866996765137 }, { "id": "identity_254328", "image": "TabMWP/images/27346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Colton need to buy two brass clips, three four-inch nails, and one gold tack?" }, { "from": "gpt", "value": "The answer is 0.29" } ], "clip_score": 33.53125, "loss": 2.1068413257598877 }, { "id": "identity_26062", "image": "VQA2.0/images/423562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this in a third world country?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.6953125, "loss": 3.1179511547088623 }, { "id": "identity_36834", "image": "PMC-VQA/images/PMC5965189_f1-cpcem-01-268_318156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is causing the casting shadows from the endometrium in the image?\nChoices:\n(A) Air in the uterus \n(B) Normal tissue \n(C) Irregular echogenic endometrial debris \n(D) Cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.671875, "loss": 3.983907699584961 }, { "id": "identity_197547", "image": "VQA2.0/images/506315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing red tops in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.9375, "loss": 4.504920959472656 }, { "id": "identity_305009", "image": "TextVQA/images/59863e32a6f4833e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the red writing here?" }, { "from": "gpt", "value": "The answer is vela" } ], "clip_score": 21.40625, "loss": 2.8706865310668945 }, { "id": "identity_238062", "image": "Geometry3K/images/1241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Quadrilateral ABCD is a rectangle. Given m∠2 = 40°, find the measure of angle ∠5" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 30.703125, "loss": 1.5647532939910889 }, { "id": "identity_146709", "image": "DocVQA/images/hncc0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company that filed the notice?" }, { "from": "gpt", "value": "The answer is CPC International Inc" } ], "clip_score": 19.921875, "loss": 4.050686359405518 }, { "id": "identity_138007", "image": "DVQA/images/bar_train_00083013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the \"year\" category across all labels?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 20.625, "loss": 2.280433416366577 }, { "id": "identity_100609", "image": "TabMWP/images/10513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of broken plates per shipment?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 34.03125, "loss": 3.1525135040283203 }, { "id": "identity_223560", "image": "TextVQA/images/e762b229f7140d59.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of merchandise is predominantly displayed in the store?" }, { "from": "gpt", "value": "The answer is Music-related merchandise" } ], "clip_score": 25.03125, "loss": 2.3256757259368896 }, { "id": "identity_292242", "image": "PMC-VQA/images/PMC6891027_jgh312213-fig-0001_78336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which lobe of the liver is the abnormality observed?\nChoices:\n(A) Right lobe\n(B) Left lobe\n(C) Caudate lobe\n(D) Quadrate lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.03125, "loss": 4.48742151260376 }, { "id": "identity_253133", "image": "AI2D/images/1766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to the food web, what is the highest trophic level that the rat can reach through its predators?" }, { "from": "gpt", "value": "The answer is snakes" } ], "clip_score": 33.65625, "loss": 2.9814844131469727 }, { "id": "identity_293786", "image": "GeoQA+/images/3830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, PA and PB are tangents to circle O, with points of tangency A and B respectively. If ∠APB = 50° and point C is on circle O, what is the measure of angle ACB?\nChoices:\n(A) 50°\n(B) 55°\n(C) 60°\n(D) 65°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.0625, "loss": 2.287403106689453 }, { "id": "identity_159533", "image": "UniGeo/images/2639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AD in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.625, "loss": 2.4512031078338623 }, { "id": "identity_237816", "image": "FigureQA/images/77551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank the curves from the maximum to the minimum area under the curve" }, { "from": "gpt", "value": "The answer is Dark Gold, Turquoise, Medium Seafoam, Firebrick, Purple" } ], "clip_score": 33.65625, "loss": 1.0099998712539673 }, { "id": "identity_255736", "image": "DVQA/images/bar_train_00050305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the 'damage' label across all groups?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 25.625, "loss": 2.5344207286834717 }, { "id": "identity_250648", "image": "A-OKVQA/images/185334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What elements on the bikes indicate that they are being used for showing off, and how do these elements contribute to the overall appearance?" }, { "from": "gpt", "value": "The answer is Decorative items and modifications enhance the bikes' visual appeal" } ], "clip_score": 28.578125, "loss": 3.121670961380005 }, { "id": "identity_270101", "image": "TabMWP/images/14089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bins in the sale section of the toy store had a toy count of at least 11 but less than 36?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.0, "loss": 2.3541581630706787 }, { "id": "identity_123565", "image": "PMC-VQA/images/PMC5658582_mbt212678-fig-0002_231982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the structure labeled B in the image?" }, { "from": "gpt", "value": "The answer is Diagonally from top left to bottom right" } ], "clip_score": 16.84375, "loss": 2.7117996215820312 }, { "id": "identity_119186", "image": "VQA2.0/images/484273.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cooking pots are visible on the counter?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.15625, "loss": 4.364853858947754 }, { "id": "identity_55113", "image": "MapQA/images/map_9943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest percentage range shown in the legend?" }, { "from": "gpt", "value": "The answer is 55.1%-55.2%" } ], "clip_score": 20.875, "loss": 1.8285881280899048 }, { "id": "identity_22835", "image": "A-OKVQA/images/387895.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does this person have a bag with them?\nChoices:\n(A) lunch\n(B) travelling\n(C) shopping\n(D) hiking" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.375, "loss": 4.332245349884033 }, { "id": "identity_211861", "image": "Super-CLEVR/images/superCLEVR_new_001411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many airplanes are depicted in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.875, "loss": 2.7509219646453857 }, { "id": "identity_292293", "image": "FigureQA/images/47993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Saddle Brown the median with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.625, "loss": 4.525954246520996 }, { "id": "identity_327404", "image": "VizWiz/images/VizWiz_train_00016026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this open?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.0625, "loss": 3.803525924682617 }, { "id": "identity_127569", "image": "A-OKVQA/images/208927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buses are parked in the lot?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.6875, "loss": 2.150238037109375 }, { "id": "identity_312257", "image": "TextVQA/images/2e1b9c0ee2c6254e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this watch?" }, { "from": "gpt", "value": "The answer is hublot" } ], "clip_score": 30.734375, "loss": 3.426246404647827 }, { "id": "identity_98200", "image": "TabMWP/images/22406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the total revenue from senior tickets sold at the Oakland Zoo is $700, how many senior tickets were sold?" }, { "from": "gpt", "value": "The answer is 100 tickets" } ], "clip_score": 33.625, "loss": 1.9095380306243896 }, { "id": "identity_277951", "image": "UniGeo/images/171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On a rectangular lawn with a length of 12m and a width of 6m, there is a curved asphalt path (with a constant width of 2m). What is the area of the grassy area represented by the blank space?\nChoices:\n(A) 70\n(B) 60\n(C) 48\n(D) 18" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.8125, "loss": 3.254315137863159 }, { "id": "identity_167829", "image": "TabMWP/images/19994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total quantity demanded at the price levels $725 and $775 combined?" }, { "from": "gpt", "value": "The answer is 37,800" } ], "clip_score": 32.84375, "loss": 1.9979984760284424 }, { "id": "identity_171246", "image": "TabMWP/images/34551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the percentage increase in the number of straight-A students from 2010 to 2011?" }, { "from": "gpt", "value": "The answer is 29.41%" } ], "clip_score": 31.40625, "loss": 1.789862871170044 }, { "id": "identity_130314", "image": "A-OKVQA/images/311229.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lanes are visible on the road in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.0, "loss": 2.386197566986084 }, { "id": "identity_155331", "image": "ScienceQA/images/4213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the noticeable feature on the penny?" }, { "from": "gpt", "value": "The answer is It has a hole" } ], "clip_score": 34.8125, "loss": 4.4783148765563965 }, { "id": "identity_2948", "image": "PMC-VQA/images/PMC3765512_F2_230058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the notable finding in the sagittal T1-weighted subtracted dynamic image of the left breast? \nChoices:\n(A) irregular mass \n(B) homogeneous mass \n(C) ascites \n(D) irregular calcifications " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0625, "loss": 4.295039176940918 }, { "id": "identity_19891", "image": "AI2D/images/1977.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A food web is represented in the diagram below. Which organisms are correctly paired with their roles in this food web?\nChoices:\n(A) ants-consumers\n(B) sun-energy source\n(C) None of above\n(D) All of above" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.015625, "loss": 3.0964303016662598 }, { "id": "identity_278500", "image": "PMC-VQA/images/PMC8199119_jcm-10-02421-f003_466488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the photograph that displays mild Descemet's membrane detachment:\n(A) Photograph A\n(B) Photograph B\n(C) Photograph C\n(D) Photograph D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.59375, "loss": 3.355679988861084 }, { "id": "identity_30544", "image": "PMC-VQA/images/PMC7779423_fig0009_333740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What structures are highlighted in the enlarged region of the image? \nChoices:\n(A) Pons and spinal cord \n(B) Pons and cerebellum \n(C) Cerebellum and middle cerebral artery \n(D) Middle cerebral artery and spinal cord. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.859375, "loss": 3.6947638988494873 }, { "id": "identity_312626", "image": "TQA/images/question_images/sun_layers_6307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost structure inside?\nChoices:\n(A) Convection zone\n(B) Radiative zone\n(C) Photosphere\n(D) Prominence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.40625, "loss": 4.425131797790527 }, { "id": "identity_175543", "image": "IconQA/images/11349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of geometric shape is this?" }, { "from": "gpt", "value": "The answer is A triangular prism" } ], "clip_score": 33.53125, "loss": 2.861400604248047 }, { "id": "identity_23434", "image": "TabMWP/images/38355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For a fundraiser, the children in the art club made greeting cards and kept track of how many they produced. How many children made exactly 2 greeting cards?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.796875, "loss": 2.146789789199829 }, { "id": "identity_80183", "image": "UniGeo/images/3077.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD = BC and ∠PEF = 25°, what is the measure of ∠EPD?" }, { "from": "gpt", "value": "The answer is 77.5°" } ], "clip_score": 30.359375, "loss": 1.774444341659546 }, { "id": "identity_170485", "image": "IconQA/images/52030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is not pink?" }, { "from": "gpt", "value": "The answer is 1/4 or 25%" } ], "clip_score": 32.25, "loss": 1.1298166513442993 }, { "id": "identity_258775", "image": "MapQA/images/map_8838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border South Carolina, which state has the highest value, and what is that value?" }, { "from": "gpt", "value": "The answer is Georgia with a value of 10" } ], "clip_score": 28.515625, "loss": 1.8898152112960815 }, { "id": "identity_127243", "image": "TextVQA/images/14a86de492158295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people can be seen walking on the street?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 14.59375, "loss": 4.777642250061035 }, { "id": "identity_73609", "image": "AI2D/images/1184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organelle is responsible for producing ATP in the cell?" }, { "from": "gpt", "value": "The answer is Mitochondria" } ], "clip_score": 24.28125, "loss": 1.3647668361663818 }, { "id": "identity_87702", "image": "Geometry3K/images/1485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the figure is made of wire, how much wire is needed to create the figure?" }, { "from": "gpt", "value": "The answer is 18 + 4.5π inches" } ], "clip_score": 29.046875, "loss": 2.949427843093872 }, { "id": "identity_101891", "image": "CLEVR-Math/images/CLEVR_train_064238.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither red nor cubes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.0, "loss": 2.4886832237243652 }, { "id": "identity_222358", "image": "VQA-AS/images/5304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the book on the shelf?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.953125, "loss": 3.210817337036133 }, { "id": "identity_102343", "image": "IconQA/images/58785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangles are in the second row?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.96875, "loss": 2.4501256942749023 }, { "id": "identity_294471", "image": "CLEVR-Math/images/CLEVR_train_017477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 large green blocks, what is the total number of large green blocks?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.40625, "loss": 1.7646719217300415 }, { "id": "identity_50357", "image": "FigureQA/images/25393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of colors represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.1875, "loss": 2.145047187805176 }, { "id": "identity_86192", "image": "UniGeo/images/1030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,A、B、C、D在⊙O上,BC是⊙O的直径.若∠D=36°,则∠ADB的度数是多少?" }, { "from": "gpt", "value": "The answer is 144°" } ], "clip_score": 32.84375, "loss": 1.7112963199615479 }, { "id": "identity_141479", "image": "DocVQA/images/rkww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the due date for the outline of the assignment?" }, { "from": "gpt", "value": "The answer is April 24, 2002" } ], "clip_score": 24.03125, "loss": 1.9570704698562622 }, { "id": "identity_268163", "image": "GeoQA+/images/5914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, O is the intersection point of the diagonals of quadrilateral ABCD, E is the midpoint of AB, and DE intersects AC at point F. If the area of quadrilateral ABCD is 12, what is the area of triangle DOE?\nChoices:\n(A) 1\n(B) 1.5\n(C) 2\n(D) 2.25" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.3125, "loss": 2.124711751937866 }, { "id": "identity_126772", "image": "Super-CLEVR/images/superCLEVR_new_000467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have the same color as the green car?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.59375, "loss": 2.6066672801971436 }, { "id": "identity_296946", "image": "PMC-VQA/images/PMC7274469_pntd.0008141.g002_184814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the mass located within the lung?\nChoices:\n(A) Left lobe\n(B) Right lobe\n(C) Middle lobe\n(D) Entire lung" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.1875, "loss": 4.6939520835876465 }, { "id": "identity_124387", "image": "A-OKVQA/images/195645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of footwear is the person second from the left wearing?" }, { "from": "gpt", "value": "The answer is Sandals" } ], "clip_score": 14.5234375, "loss": 3.3141825199127197 }, { "id": "identity_296873", "image": "PMC-VQA/images/PMC7328922_F2_202997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the white arrow represent in the image? \nOptions:\n(A) the size of the tumor \n(B) the position of the tumor \n(C) the form of the tumor \n(D) the color of the tumor" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.078125, "loss": 5.259801387786865 }, { "id": "identity_305820", "image": "MapQA/images/map_1741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in this region?" }, { "from": "gpt", "value": "The answer is 3080653-3309563" } ], "clip_score": 26.1875, "loss": 2.526639461517334 }, { "id": "identity_253969", "image": "GeoQA+/images/4962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB∥CD,∠A=50°,∠C=20°,若∠E的平分线与AB交于点O,求∠AOD的大小。" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 32.625, "loss": 1.8679741621017456 }, { "id": "identity_267515", "image": "UniGeo/images/1945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, point C lies on the line segment AB. If AC is 4 units long, BC is 6 units long, and points M and N are the midpoints of line segments AC and BC respectively, what is the length of segment MN?\nChoices:\n(A) 2\n(B) 3\n(C) 10\n(D) 5" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.015625, "loss": 2.4357123374938965 }, { "id": "identity_143329", "image": "PlotQA/images/52134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Kyrgyz Republic make the lowest payment?" }, { "from": "gpt", "value": "The answer is 2007" } ], "clip_score": 23.84375, "loss": 1.4962929487228394 }, { "id": "identity_335571", "image": "ScienceQA/images/9929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What material is this container made of?\nChoices:\n(A) concrete\n(B) plastic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.828125, "loss": 3.9914143085479736 }, { "id": "identity_139927", "image": "DocVQA/images/xnnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the applicant?" }, { "from": "gpt", "value": "The answer is Martin Joseph Reape III" } ], "clip_score": 24.9375, "loss": 4.564133644104004 }, { "id": "identity_327991", "image": "FigureQA/images/74007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the most variable?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.9375, "loss": 4.1799421310424805 }, { "id": "identity_32871", "image": "VizWiz/images/VizWiz_train_00016957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the grayest clouds this morning look like storm clouds or just regular clouds?" }, { "from": "gpt", "value": "The answer is regular clouds" } ], "clip_score": 22.671875, "loss": 2.457784652709961 }, { "id": "identity_97530", "image": "IconQA/images/102094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time difference between when Fred got the mail and when Tony got the mail?" }, { "from": "gpt", "value": "The answer is 2 hours" } ], "clip_score": 32.78125, "loss": 1.9727176427841187 }, { "id": "identity_279871", "image": "FigureQA/images/79158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the curve with the largest area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.96875, "loss": 4.526287078857422 }, { "id": "identity_131090", "image": "Super-CLEVR/images/superCLEVR_new_001179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the airplane in the bottom left corner?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.8125, "loss": 2.8128550052642822 }, { "id": "identity_220668", "image": "A-OKVQA/images/197163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is seen in the background?" }, { "from": "gpt", "value": "The answer is Grassland with scattered trees" } ], "clip_score": 13.1953125, "loss": 3.2160637378692627 }, { "id": "identity_248094", "image": "DocVQA/images/tqvc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the letter and who is it addressed to?" }, { "from": "gpt", "value": "The answer is 16 May 1957, Dr. John B. Youmans" } ], "clip_score": 28.953125, "loss": 2.2504658699035645 }, { "id": "identity_314267", "image": "DocVQA/images/grvv0228_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Salary Schedule for this person in 1979?" }, { "from": "gpt", "value": "The answer is 13,200" } ], "clip_score": 26.0, "loss": 1.9683616161346436 }, { "id": "identity_104863", "image": "A-OKVQA/images/245793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown on the image?" }, { "from": "gpt", "value": "The answer is 18:19" } ], "clip_score": 13.4765625, "loss": 2.06026029586792 }, { "id": "identity_235427", "image": "FigureQA/images/15088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the pie chart is represented by Light Seafoam?" }, { "from": "gpt", "value": "The answer is 33.33%" } ], "clip_score": 39.09375, "loss": 1.2109203338623047 }, { "id": "identity_276976", "image": "FigureQA/images/48888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Purple not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.09375, "loss": 3.9369568824768066 }, { "id": "identity_277160", "image": "VizWiz/images/VizWiz_train_00018331.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On what temperature is the toaster oven currently set?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 30.828125, "loss": 3.74802827835083 }, { "id": "identity_234098", "image": "AI2D/images/3244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter represents the gel-like fluid inside the cell where the organelles are suspended, and also identify the letter representing the site where proteins are synthesized?" }, { "from": "gpt", "value": "The answer is D and A" } ], "clip_score": 27.875, "loss": 2.8855984210968018 }, { "id": "identity_96914", "image": "TabMWP/images/27034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a passenger needs to be at Milford by 2:00 P.M., what is the latest time they can leave Fairfax?" }, { "from": "gpt", "value": "The answer is 1:00 P.M" } ], "clip_score": 23.78125, "loss": 1.3950157165527344 }, { "id": "identity_264225", "image": "VQA-AS/images/16150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which individual is pursuing the cat?" }, { "from": "gpt", "value": "The answer is dog" } ], "clip_score": 24.234375, "loss": 4.115078926086426 }, { "id": "identity_162847", "image": "GeoQA+/images/3223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AC?" }, { "from": "gpt", "value": "The answer is AC = sqrt(32)" } ], "clip_score": 26.609375, "loss": 2.3191845417022705 }, { "id": "identity_328551", "image": "ScienceQA/images/14561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the question that Lamar's experiment can most effectively address.\nChoices:\n(A) Can pennies hold more drops of pure water or water mixed with hand soap?\n(B) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.0625, "loss": 3.368598461151123 }, { "id": "identity_171480", "image": "IconQA/images/52680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of leaves to snails?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 30.921875, "loss": 1.5071630477905273 }, { "id": "identity_24075", "image": "PMC-VQA/images/PMC7506806_Fig2_258627.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the possible diagnosis of the patient?\nChoices:\n(A) Lung cancer \n(B) Common cold \n(C) Malaria \n(D) Tuberculosis" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.578125, "loss": 3.888484477996826 }, { "id": "identity_73523", "image": "ScienceQA/images/7612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible water source in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.21875, "loss": 2.6621174812316895 }, { "id": "identity_215187", "image": "A-OKVQA/images/198437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the saddle pad under the saddle?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.234375, "loss": 3.3159496784210205 }, { "id": "identity_68397", "image": "ScienceQA/images/5395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the plainfin midshipman play in this marine food web?" }, { "from": "gpt", "value": "The answer is Primary consumer" } ], "clip_score": 28.71875, "loss": 4.414675235748291 }, { "id": "identity_177178", "image": "IconQA/images/83363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time spent by Jim on watching a movie and writing a story?" }, { "from": "gpt", "value": "The answer is 1 hour" } ], "clip_score": 35.28125, "loss": 2.3703324794769287 }, { "id": "identity_240875", "image": "TabMWP/images/1543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Pete need to buy a dog calendar, an atlas, and a dictionary?" }, { "from": "gpt", "value": "The answer is 67" } ], "clip_score": 31.140625, "loss": 3.1622860431671143 }, { "id": "identity_218895", "image": "VQA2.0/images/367715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is More than three trees" } ], "clip_score": 12.359375, "loss": 3.4628117084503174 }, { "id": "identity_196403", "image": "PMC-VQA/images/PMC4559303_Fig3_420544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible blockage or narrowing in the blood vessels shown in image A?" }, { "from": "gpt", "value": "The answer is No blockage or narrowing visible" } ], "clip_score": 23.03125, "loss": 2.6671342849731445 }, { "id": "identity_166802", "image": "IconQA/images/70791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the ruler's length does the line cover?" }, { "from": "gpt", "value": "The answer is 2/5" } ], "clip_score": 32.375, "loss": 1.6809208393096924 }, { "id": "identity_239629", "image": "ScienceQA/images/7990.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the rusting time of steel nails compare when submerged in water, vinegar, and saltwater?" }, { "from": "gpt", "value": "The answer is Nails rust fastest in vinegar, then saltwater, then water" } ], "clip_score": 25.234375, "loss": 2.128805160522461 }, { "id": "identity_63543", "image": "TQA/images/abc_question_images/human_system_ear_12880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the auditory canal?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.828125, "loss": 2.5689902305603027 }, { "id": "identity_139808", "image": "DVQA/images/bar_train_00031104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people prefer beach over hurry?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 25.34375, "loss": 2.3214282989501953 }, { "id": "identity_78580", "image": "TQA/images/question_images/muscle_fiber_7090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Myelin sheath\"?" }, { "from": "gpt", "value": "The answer is Insulates nerve cells and speeds up nerve impulses" } ], "clip_score": 32.0, "loss": 1.7258635759353638 }, { "id": "identity_246927", "image": "A-OKVQA/images/508896.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the trick the man is doing on the half pipe, and how do they differentiate it from the other types of tricks listed?" }, { "from": "gpt", "value": "The answer is Lip trick involves balancing on the lip of the half pipe" } ], "clip_score": 28.15625, "loss": 3.593670129776001 }, { "id": "identity_269404", "image": "Super-CLEVR/images/superCLEVR_new_000760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the large blue shiny object and the large brown shiny object of the same shape?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 13.4765625, "loss": 5.510934352874756 }, { "id": "identity_204391", "image": "VQA-AS/images/1588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fish visible in the pond?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.796875, "loss": 3.2432303428649902 }, { "id": "identity_336937", "image": "A-OKVQA/images/383930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person wearing?\nChoices:\n(A) bow tie\n(B) hat\n(C) scarf\n(D) suspenders" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.3046875, "loss": 4.624262809753418 }, { "id": "identity_285400", "image": "PlotQA/images/83035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gap between the highest and lowest merchandise exports?" }, { "from": "gpt", "value": "The answer is 4.818503028425251" } ], "clip_score": 24.359375, "loss": 2.693363666534424 }, { "id": "identity_237710", "image": "FigureQA/images/53222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the largest and smallest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 30.890625, "loss": 1.8117849826812744 }, { "id": "identity_228718", "image": "VizWiz/images/VizWiz_train_00005289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total regular price of all the items listed on this page, and what is the total sale price if all coupons are used?" }, { "from": "gpt", "value": "The answer is Regular price: $366.92, Sale price: $197" } ], "clip_score": 24.15625, "loss": 2.0517303943634033 }, { "id": "identity_232687", "image": "FigureQA/images/74335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the least variation in its y-axis values over the entire x-axis range?" }, { "from": "gpt", "value": "The answer is Orchid and Medium Orchid" } ], "clip_score": 33.5625, "loss": 2.05853533744812 }, { "id": "identity_92177", "image": "IconQA/images/42781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows of tomatoes are there?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.609375, "loss": 2.4230899810791016 }, { "id": "identity_93503", "image": "TabMWP/images/35957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many racks have more than 50 ties but less than 70 ties?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.328125, "loss": 2.682030439376831 }, { "id": "identity_236526", "image": "GeoQA+/images/3688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,点C在⊙O上,半径OD∥AC,如果∠BOD=130°,那么∠BAC的度数是多少?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 33.0, "loss": 1.8561073541641235 }, { "id": "identity_257778", "image": "TabMWP/images/30541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of pages written by the author over the past 6 days?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.75, "loss": 2.815044403076172 }, { "id": "identity_157738", "image": "TQA/images/question_images/hydrocarbons_8090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hydrogen atoms in the molecule of butane?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.5, "loss": 1.7293798923492432 }, { "id": "identity_292654", "image": "TabMWP/images/32483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likelihood that a randomly chosen key from Kendall's box, which is labeled with a green sticker, opens the front door of the house?" }, { "from": "gpt", "value": "The answer is 1/5" } ], "clip_score": 28.0625, "loss": 2.055727481842041 }, { "id": "identity_177145", "image": "TabMWP/images/31287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what would be the y-value when x=4?" }, { "from": "gpt", "value": "The answer is -1" } ], "clip_score": 27.546875, "loss": 3.3786087036132812 }, { "id": "identity_62932", "image": "TQA/images/question_images/optics_ray_diagrams_9175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the focal length of the lens if the distance between the object and the image is 10 cm?" }, { "from": "gpt", "value": "The answer is 2.5 cm" } ], "clip_score": 30.5, "loss": 1.4314320087432861 }, { "id": "identity_223424", "image": "VQA2.0/images/104550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color scheme of the image?" }, { "from": "gpt", "value": "The answer is Black and white with a touch of color" } ], "clip_score": 23.0625, "loss": 2.4696033000946045 }, { "id": "identity_334187", "image": "IconQA/images/89710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There are 7 snowflakes in the top frame. How many snowflakes are in the bottom frame?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 36.40625, "loss": 2.220851182937622 }, { "id": "identity_47287", "image": "DocVQA/images/hqvf0065_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the entree choice for the lunch meeting?" }, { "from": "gpt", "value": "The answer is John Lunsford's Choice" } ], "clip_score": 20.125, "loss": 3.6391983032226562 }, { "id": "identity_9246", "image": "PMC-VQA/images/PMC7913723_diagnostics-11-00230-f003_376942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique is seen in the figure?\nChoices:\n(A) MRI \n(B) CT scan \n(C) PET scan \n(D) X-ray" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 4.2643890380859375 }, { "id": "identity_261796", "image": "ChartQA/images/multi_col_100828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the term \"blue bar data\" signify?" }, { "from": "gpt", "value": "The answer is Canada" } ], "clip_score": 24.625, "loss": 3.8085906505584717 }, { "id": "identity_234909", "image": "CLEVR-Math/images/CLEVR_train_030581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are left after removing all yellow spheres and green blocks, and then adding two new spheres?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.515625, "loss": 2.2546160221099854 }, { "id": "identity_46390", "image": "FigureQA/images/71755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines on the graph have a value of 53 at the 80 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.46875, "loss": 2.292332410812378 }, { "id": "identity_164737", "image": "Geometry3K/images/45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle YWZ?" }, { "from": "gpt", "value": "The answer is 140 degrees" } ], "clip_score": 33.4375, "loss": 1.9342596530914307 }, { "id": "identity_165684", "image": "GeoQA+/images/4432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, D, E, and F are midpoints of sides BC, CA, and AB respectively. If angle BAC is 60 degrees, what is the measure of angle EDF?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 34.8125, "loss": 1.4785637855529785 }, { "id": "identity_96669", "image": "TabMWP/images/4790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of patients seen by the doctor over the four months?" }, { "from": "gpt", "value": "The answer is 630.75" } ], "clip_score": 32.40625, "loss": 2.0900895595550537 }, { "id": "identity_88736", "image": "GeoQA+/images/2209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with D and E as midpoints of BC and AD respectively, and EF twice the length of FC, what is the length of segment EF if the length of BC is 8?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.1875, "loss": 2.6684916019439697 }, { "id": "identity_140655", "image": "DVQA/images/bar_train_00159604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest individual bar value?" }, { "from": "gpt", "value": "The answer is murder" } ], "clip_score": 26.765625, "loss": 3.5401618480682373 }, { "id": "identity_323090", "image": "IconQA/images/83449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many items are there?\nChoices:\n(A) 3\n(B) 1\n(C) 4\n(D) 2\n(E) 5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.40625, "loss": 3.1833205223083496 }, { "id": "identity_324506", "image": "FigureQA/images/40529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this greater than Crimson?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.84375, "loss": 4.233548164367676 }, { "id": "identity_31866", "image": "DVQA/images/bar_train_00100050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy?" }, { "from": "gpt", "value": "The answer is use" } ], "clip_score": 33.28125, "loss": 2.9133732318878174 }, { "id": "identity_327329", "image": "PMC-VQA/images/PMC6584983_Fig1_496312.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was not seen in the abdominal X-ray?\nChoices:\n(A) air in the stomach \n(B) multiple air-fluid levels in the loops \n(C) free gas under the diaphragm \n(D) air in the colon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.84375, "loss": 3.80100679397583 }, { "id": "identity_186194", "image": "A-OKVQA/images/251723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is placed on the desk near the center of the image?" }, { "from": "gpt", "value": "The answer is A potted plant" } ], "clip_score": 20.171875, "loss": 2.4266152381896973 }, { "id": "identity_156358", "image": "ScienceQA/images/18908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What defense mechanisms might the organism in the image possess?" }, { "from": "gpt", "value": "The answer is Aposematism, toxic secretions, and autotomy (shedding body parts)" } ], "clip_score": 20.078125, "loss": 2.3355817794799805 }, { "id": "identity_94251", "image": "CLEVR-Math/images/CLEVR_train_021877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are shiny?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.265625, "loss": 2.3473570346832275 }, { "id": "identity_155318", "image": "ScienceQA/images/11573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the rope depicted in a state ready for use or storage?" }, { "from": "gpt", "value": "The answer is Storage" } ], "clip_score": 28.28125, "loss": 3.3131415843963623 }, { "id": "identity_40242", "image": "DVQA/images/bar_train_00128070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the 'gun' bars across both categories?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.5625, "loss": 2.364020347595215 }, { "id": "identity_171442", "image": "IconQA/images/23747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one dot from the third row, how many dots will remain in that row?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 32.40625, "loss": 2.3250176906585693 }, { "id": "identity_236100", "image": "IconQA/images/92245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, what is the probability that both will be black?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 26.390625, "loss": 2.304914712905884 }, { "id": "identity_155670", "image": "ScienceQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique items are listed in total between Mary and Lisa?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.546875, "loss": 1.8649529218673706 }, { "id": "identity_25788", "image": "TabMWP/images/33783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The board of commerce recorded how many employees work at each company. How many more employees work at Benson and Blakely than at Madison Corporation?" }, { "from": "gpt", "value": "The answer is 747" } ], "clip_score": 30.921875, "loss": 2.5184555053710938 }, { "id": "identity_170258", "image": "TabMWP/images/28955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Hazel buys one of each item, how much would she spend in total?" }, { "from": "gpt", "value": "The answer is $5223.00" } ], "clip_score": 17.4375, "loss": 2.0930683612823486 }, { "id": "identity_21639", "image": "ChartQA/images/3655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the value of the largest bar?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 14.6484375, "loss": 2.2089431285858154 }, { "id": "identity_193380", "image": "TextVQA/images/18d9171f18846806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pants is the person on the left wearing?" }, { "from": "gpt", "value": "The answer is Jeans" } ], "clip_score": 24.0625, "loss": 2.8344852924346924 }, { "id": "identity_287937", "image": "TextVQA/images/9ff36f284db50ec9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the store?" }, { "from": "gpt", "value": "The answer is penney's" } ], "clip_score": 15.7109375, "loss": 1.9583841562271118 }, { "id": "identity_129045", "image": "PMC-VQA/images/PMC7168200_diagnostics-10-00018-f002_154103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical orientation of this image?" }, { "from": "gpt", "value": "The answer is Axial orientation" } ], "clip_score": 32.5625, "loss": 3.6094439029693604 }, { "id": "identity_125256", "image": "A-OKVQA/images/525567.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What street name is visible at the intersection?" }, { "from": "gpt", "value": "The answer is Cambie St" } ], "clip_score": 28.40625, "loss": 3.402378797531128 }, { "id": "identity_67717", "image": "AI2D/images/4044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of gravity in the Sun as shown in the image?" }, { "from": "gpt", "value": "The answer is Inward towards the center" } ], "clip_score": 26.046875, "loss": 2.768376350402832 }, { "id": "identity_105655", "image": "PMC-VQA/images/PMC6825074_Fig6_59899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a measurement scale present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.859375, "loss": 3.0512986183166504 }, { "id": "identity_249778", "image": "A-OKVQA/images/415750.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which mobile brand has the largest display advertisement in the image, and how many people are standing in front of it?" }, { "from": "gpt", "value": "The answer is Samsung, six people" } ], "clip_score": 27.3125, "loss": 3.9652371406555176 }, { "id": "identity_92148", "image": "IconQA/images/44298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individual socks are there?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.3125, "loss": 2.0081160068511963 }, { "id": "identity_22425", "image": "A-OKVQA/images/246183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What electronics company made the blue balloons?\nChoices:\n(A) samsung\n(B) apple\n(C) sony\n(D) microsoft" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.5859375, "loss": 3.1641623973846436 }, { "id": "identity_241793", "image": "TextVQA/images/7d15b68bbae4c880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the digits of the route number shown on the sign?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 21.828125, "loss": 2.448413372039795 }, { "id": "identity_83012", "image": "GeoQA+/images/5504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of AG?" }, { "from": "gpt", "value": "The answer is sqrt(8)" } ], "clip_score": 30.96875, "loss": 2.3525426387786865 }, { "id": "identity_9941", "image": "DVQA/images/bar_train_00081262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart presented in a logarithmic scale?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 24.015625, "loss": 3.665050506591797 }, { "id": "identity_138127", "image": "DocVQA/images/htlb0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What university is Dr. Wm. J. Darby associated with?" }, { "from": "gpt", "value": "The answer is Vanderbilt University" } ], "clip_score": 28.515625, "loss": 1.462280035018921 }, { "id": "identity_295590", "image": "AI2D/images/1665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During a solar eclipse, which object is closest to the sun?\nChoices:\n(A) Umbra\n(B) Penumbra\n(C) Earth\n(D) Moon" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.828125, "loss": 3.3508293628692627 }, { "id": "identity_192522", "image": "TextVQA/images/afe8be3c30878eb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible markings or logos on the space shuttle besides its name?" }, { "from": "gpt", "value": "The answer is Yes, a red triangle logo" } ], "clip_score": 25.96875, "loss": 4.126236438751221 }, { "id": "identity_436", "image": "DocVQA/images/srpc0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the memorandum dated on ?" }, { "from": "gpt", "value": "The answer is March 9, 1967" } ], "clip_score": 25.296875, "loss": 1.8108569383621216 }, { "id": "identity_184653", "image": "PMC-VQA/images/PMC5031756_Fig1_75466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level indicated in the image?" }, { "from": "gpt", "value": "The answer is 10 micrometers" } ], "clip_score": 27.984375, "loss": 2.257171392440796 }, { "id": "identity_85110", "image": "UniGeo/images/2723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment AC in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is √89" } ], "clip_score": 34.28125, "loss": 3.695439100265503 }, { "id": "identity_20356", "image": "DVQA/images/bar_train_00061396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is mode" } ], "clip_score": 18.734375, "loss": 4.133709907531738 }, { "id": "identity_160475", "image": "Geometry3K/images/1474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side FG in the triangle?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 38.375, "loss": 1.7660833597183228 }, { "id": "identity_191876", "image": "A-OKVQA/images/125117.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shorts worn by the person closest to the fire hydrant?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 32.3125, "loss": 4.262688636779785 }, { "id": "identity_23417", "image": "PMC-VQA/images/PMC4086335_fig1_304253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the imaging technique used to capture the image in (c)? \nChoices:\n(A) T2 SE \n(B) T1 SE \n(C) T1 IR \n(D) T2 FRF " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.296875, "loss": 4.085691452026367 }, { "id": "identity_287186", "image": "AI2D/images/2719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many phases of the moon are depicted in the image?\nChoices:\n(A) 2\n(B) 3\n(C) 1\n(D) 8" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.921875, "loss": 3.0730302333831787 }, { "id": "identity_76321", "image": "TQA/images/question_images/seasons_1042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many equinoxes are shown in the diagram?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.3125, "loss": 2.510056495666504 }, { "id": "identity_300897", "image": "TabMWP/images/26512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In hopes of raising more funds for arts education, some parents in the Greenville School District publicized the current per-student arts education budget. According to the table, what was the rate of change between these two years?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 43.71875, "loss": 3.353963851928711 }, { "id": "identity_244668", "image": "AI2D/images/836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank the stages of the insect's life cycle from smallest to largest, excluding the eggs" }, { "from": "gpt", "value": "The answer is Young Nymph, Later Nymph, Adult" } ], "clip_score": 37.25, "loss": 1.262298345565796 }, { "id": "identity_203202", "image": "PMC-VQA/images/PMC7082488_Fig10_138399.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of sinusitis in the CT image?" }, { "from": "gpt", "value": "The answer is No, there is no evidence of sinusitis" } ], "clip_score": 30.421875, "loss": 1.591960072517395 }, { "id": "identity_116846", "image": "TextVQA/images/ce621548b854c803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the device's screen background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.375, "loss": 3.471545696258545 }, { "id": "identity_253035", "image": "A-OKVQA/images/147879.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lights are visible on the ceiling, and what is the shape of the objects hanging below the lights?" }, { "from": "gpt", "value": "The answer is Approximately 30 lights, umbrella-shaped objects" } ], "clip_score": 29.875, "loss": 2.039206027984619 }, { "id": "identity_81036", "image": "GeoQA+/images/2218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle D is 56 degrees, what is the measure of angle ADB?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 34.25, "loss": 1.8819878101348877 }, { "id": "identity_224713", "image": "Super-CLEVR/images/superCLEVR_new_001214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles in the image, and how many of them are either small rubber airliners or big brown matte buses?" }, { "from": "gpt", "value": "The answer is Total vehicles: 7, Small rubber airliners or big brown matte buses: 3" } ], "clip_score": 25.171875, "loss": 1.2236794233322144 }, { "id": "identity_163045", "image": "GeoQA+/images/234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangle ABC is congruent to triangle ADE, and the perimeter of triangle ABC is 18 cm, what is the perimeter of triangle ADE?" }, { "from": "gpt", "value": "The answer is 18 cm" } ], "clip_score": 33.84375, "loss": 1.2702422142028809 }, { "id": "identity_41933", "image": "DVQA/images/bar_train_00183784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'fault' category consistently the lowest value across all groups?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.1875, "loss": 3.516292095184326 }, { "id": "identity_169857", "image": "TabMWP/images/36955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of comic strips read over the 7 days?" }, { "from": "gpt", "value": "The answer is 43" } ], "clip_score": 36.3125, "loss": 2.353062868118286 }, { "id": "identity_136390", "image": "ChartQA/images/two_col_100404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many names have a count of individuals greater than 300?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.046875, "loss": 2.7913691997528076 }, { "id": "identity_171751", "image": "TabMWP/images/6453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units does the y-value decrease when the x-value increases from 4 to 5?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.140625, "loss": 2.3516438007354736 }, { "id": "identity_223870", "image": "TabMWP/images/11068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the market reach equilibrium, and what is the equilibrium quantity?" }, { "from": "gpt", "value": "The answer is $305 and 21,100" } ], "clip_score": 23.78125, "loss": 2.4768261909484863 }, { "id": "identity_292645", "image": "Super-CLEVR/images/superCLEVR_new_000647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the big gray object that is constructed from the same material as the small blue airplane?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 24.109375, "loss": 3.1518161296844482 }, { "id": "identity_310510", "image": "FigureQA/images/83076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2421875, "loss": 4.198046684265137 }, { "id": "identity_157238", "image": "TQA/images/question_images/ocean_zones_8124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate depth in meters where sunlight penetration ends?" }, { "from": "gpt", "value": "The answer is 200 meters" } ], "clip_score": 26.171875, "loss": 1.6230899095535278 }, { "id": "identity_123999", "image": "PMC-VQA/images/PMC8618346_viruses-13-02109-f003_121105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the segmented 3D volume that is not red spheres?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.0625, "loss": 3.7425899505615234 }, { "id": "identity_19150", "image": "AI2D/images/4136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is N in the diagram?\nChoices:\n(A) none of the above\n(B) chloroplast\n(C) air space\n(D) mesophyll" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.265625, "loss": 3.844801425933838 }, { "id": "identity_60895", "image": "MapQA/images/map_6753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value within the range of 26-28?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.25, "loss": 2.5389468669891357 }, { "id": "identity_270816", "image": "GeoQA+/images/3721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the trapezoid ABCD shown, where AD is parallel to BC, AD = 3, AB = 5, BC = 9, the perpendicular bisector of CD intersects BC at E. If DE is connected, what is the perimeter of quadrilateral ABED?\nChoices:\n(A) 17\n(B) 18\n(C) 19\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.46875, "loss": 2.300535202026367 }, { "id": "identity_197166", "image": "PMC-VQA/images/PMC6620105_f0001_6613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any pleural effusion be identified in this chest X-ray?" }, { "from": "gpt", "value": "The answer is No pleural effusion identified" } ], "clip_score": 36.09375, "loss": 2.7555489540100098 }, { "id": "identity_324295", "image": "TabMWP/images/821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For a math project, students in a class recorded the number of pets that each student has. How many students have more than 2 pets?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 34.375, "loss": 2.0760018825531006 }, { "id": "identity_258548", "image": "VQA2.0/images/214059.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the image, and what are their colors?" }, { "from": "gpt", "value": "The answer is three vehicles: two white buses and one blue and white service vehicle" } ], "clip_score": 22.546875, "loss": 2.486011028289795 }, { "id": "identity_336008", "image": "TQA/images/abc_question_images/earth_eclipses_11654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label indicates the penumbra in this diagram?\nChoices:\n(A) S\n(B) E\n(C) D\n(D) H" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.40625, "loss": 3.9296188354492188 }, { "id": "identity_71621", "image": "AI2D/images/647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which hemisphere is experiencing summer during the June Solstice?" }, { "from": "gpt", "value": "The answer is Northern Hemisphere" } ], "clip_score": 33.71875, "loss": 2.4237730503082275 }, { "id": "identity_161444", "image": "GeoQA+/images/862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 1, what is the length of segment OC?" }, { "from": "gpt", "value": "The answer is √2 or approximately 1.414" } ], "clip_score": 31.984375, "loss": 1.1991279125213623 }, { "id": "identity_47012", "image": "DocVQA/images/xrcw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source link for this document?" }, { "from": "gpt", "value": "The answer is https://www.industrydocuments.ucsf.edu/docs/xrcw0217" } ], "clip_score": 22.875, "loss": 1.8701034784317017 }, { "id": "identity_165100", "image": "GeoQA+/images/73.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在⊙O中,点A是圆上一点,且∠BAC=30°,若OC是⊙O的半径,则∠BOC的度数是多少?" }, { "from": "gpt", "value": "The answer is 75°" } ], "clip_score": 31.203125, "loss": 2.0577046871185303 }, { "id": "identity_92560", "image": "IconQA/images/81908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you pack 2 more pillows, what will be the new ratio of pillows to sleeping bags?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 26.703125, "loss": 2.0025031566619873 }, { "id": "identity_26606", "image": "A-OKVQA/images/104631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What singer has the same last name as the word that appears on the board?\nChoices:\n(A) eddie money\n(B) gwen stefani\n(C) pink\n(D) johnny cash" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.890625, "loss": 3.691385269165039 }, { "id": "identity_79877", "image": "GeoQA+/images/1868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 10 units, find the length of segment AB" }, { "from": "gpt", "value": "The answer is approximately 12.36 units" } ], "clip_score": 33.53125, "loss": 1.9242196083068848 }, { "id": "identity_106909", "image": "VQA2.0/images/486711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What body of water can be seen in the distance?" }, { "from": "gpt", "value": "The answer is Bay or sea" } ], "clip_score": 17.34375, "loss": 3.349579334259033 }, { "id": "identity_1247", "image": "VQA-AS/images/9469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the woman sitting or standing?" }, { "from": "gpt", "value": "The answer is standing" } ], "clip_score": 20.328125, "loss": 3.384065628051758 }, { "id": "identity_285094", "image": "GeoQA+/images/2039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure, if ∠BOC is 100°, what is the degree measure of ∠A?\nChoices:\n(A) 50°\n(B) 80°\n(C) 100°\n(D) 130°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.0, "loss": 2.6440114974975586 }, { "id": "identity_330215", "image": "TabMWP/images/13532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is this function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.9375, "loss": 4.130471229553223 }, { "id": "identity_264397", "image": "DocVQA/images/lhkv0001_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of contribution?" }, { "from": "gpt", "value": "The answer is $3,000.00" } ], "clip_score": 19.578125, "loss": 1.596176266670227 }, { "id": "identity_175724", "image": "IconQA/images/18857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more stars or rockets in the image?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 26.984375, "loss": 3.395554780960083 }, { "id": "identity_190841", "image": "VQA2.0/images/140943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cross symbol on the pharmacy sign?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 14.9296875, "loss": 3.1382832527160645 }, { "id": "identity_309642", "image": "DocVQA/images/rrcw0217_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fax number here?" }, { "from": "gpt", "value": "The answer is 609/924-6648" } ], "clip_score": 16.25, "loss": 2.1928420066833496 }, { "id": "identity_188770", "image": "PMC-VQA/images/PMC7484669_F4_251931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible signs of consolidation in the lung fields?" }, { "from": "gpt", "value": "The answer is No visible consolidation" } ], "clip_score": 29.203125, "loss": 3.1641955375671387 }, { "id": "identity_187573", "image": "VQA-AS/images/8993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the woman wearing on her feet?" }, { "from": "gpt", "value": "The answer is Red high heels" } ], "clip_score": 17.125, "loss": 3.3687973022460938 }, { "id": "identity_245390", "image": "TextVQA/images/8ca1f379a91ad12d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the current time is 11:30, what will be the time after 2 hours and 45 minutes?" }, { "from": "gpt", "value": "The answer is 2:15 PM" } ], "clip_score": 15.515625, "loss": 1.5853983163833618 }, { "id": "identity_21109", "image": "IconQA/images/57384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use dice to measure the line. The line is about (_) dice long." }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.796875, "loss": 2.5314884185791016 }, { "id": "identity_28984", "image": "TextVQA/images/5af917d6bf6979b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how much are they selling petrified wood for?" }, { "from": "gpt", "value": "The answer is 5.00" } ], "clip_score": 33.21875, "loss": 1.859284520149231 }, { "id": "identity_94381", "image": "TabMWP/images/10272.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years saw a decrease in the number of employees with health care benefits?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 36.71875, "loss": 2.664886474609375 }, { "id": "identity_330654", "image": "TextVQA/images/20a19fc5482d388d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is that stove?" }, { "from": "gpt", "value": "The answer is aga" } ], "clip_score": 27.359375, "loss": 3.875417709350586 }, { "id": "identity_102144", "image": "TabMWP/images/19690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many notebooks are in 3 stacks?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.90625, "loss": 1.7051392793655396 }, { "id": "identity_136608", "image": "DocVQA/images/qggl0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the month and day written at the bottom of the document?" }, { "from": "gpt", "value": "The answer is March 31st" } ], "clip_score": 22.421875, "loss": 1.8220912218093872 }, { "id": "identity_206726", "image": "PMC-VQA/images/PMC7581161_F2_280580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of hydrocephalus in the brain MRI?" }, { "from": "gpt", "value": "The answer is No, there is no indication of hydrocephalus" } ], "clip_score": 33.375, "loss": 1.3237974643707275 }, { "id": "identity_125707", "image": "TextVQA/images/15882612669be448.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What other establishment's sign is visible in the bottom right corner of the image?" }, { "from": "gpt", "value": "The answer is Trump Plaza" } ], "clip_score": 25.515625, "loss": 2.464026927947998 }, { "id": "identity_324273", "image": "ScienceQA/images/18218.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Nashville\n(B) Cheyenne\n(C) Memphis\n(D) Providence" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.625, "loss": 4.319763660430908 }, { "id": "identity_77899", "image": "TQA/images/question_images/nuclear_energy_8104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mass number of the element represented by the symbol Kr in the first generation?" }, { "from": "gpt", "value": "The answer is 94" } ], "clip_score": 22.25, "loss": 2.1358110904693604 }, { "id": "identity_125781", "image": "VQA-AS/images/16285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the closest flower to the bottom left corner of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 21.078125, "loss": 2.5332281589508057 }, { "id": "identity_178956", "image": "IconQA/images/50221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of teddy bears in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 20.953125, "loss": 2.1406447887420654 }, { "id": "identity_163691", "image": "GeoQA+/images/5929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle OAC in the given circle if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 10 + 10π/3 units" } ], "clip_score": 33.34375, "loss": 2.2976577281951904 }, { "id": "identity_287895", "image": "TextVQA/images/00672399e1e9e074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the name of the clock \"Detecto\"?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 26.734375, "loss": 3.7997002601623535 }, { "id": "identity_333892", "image": "VQA2.0/images/367814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are these items used for?" }, { "from": "gpt", "value": "The answer is cutting things" } ], "clip_score": 25.6875, "loss": 3.5801808834075928 }, { "id": "identity_109594", "image": "Super-CLEVR/images/superCLEVR_new_000864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car that is behind the gray thing on the right side of the big cyan bicycle that is left of the small rubber bicycle?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 30.953125, "loss": 4.283507347106934 }, { "id": "identity_82034", "image": "UniGeo/images/1143.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of the chord AB is 8 units, what is the distance from point P to chord AB?" }, { "from": "gpt", "value": "The answer is 3 units" } ], "clip_score": 30.03125, "loss": 2.0877137184143066 }, { "id": "identity_260210", "image": "TabMWP/images/11361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of leaf rubbings made by all students in Mrs. Lopez's art class?" }, { "from": "gpt", "value": "The answer is 257" } ], "clip_score": 29.484375, "loss": 2.43837833404541 }, { "id": "identity_300442", "image": "ScienceQA/images/458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the air temperature measured in the highlighted area?\nChoices:\n(A) 3°C\n(B) -17°C\n(C) -4°C" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.46875, "loss": 3.8595237731933594 }, { "id": "identity_195522", "image": "A-OKVQA/images/367969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian figures are painted on the road surface?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.546875, "loss": 3.9299685955047607 }, { "id": "identity_331357", "image": "Super-CLEVR/images/superCLEVR_new_000647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the big gray thing made of the same material as the small blue aeroplane?" }, { "from": "gpt", "value": "The answer is minivan" } ], "clip_score": 23.15625, "loss": 2.8794591426849365 }, { "id": "identity_218307", "image": "A-OKVQA/images/455156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the batting gloves worn by the player at the plate?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.234375, "loss": 3.4231441020965576 }, { "id": "identity_311722", "image": "VQA2.0/images/24608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this parked?" }, { "from": "gpt", "value": "The answer is no parking zone" } ], "clip_score": 21.578125, "loss": 3.0414586067199707 }, { "id": "identity_333044", "image": "A-OKVQA/images/94132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the child interacting with?\nChoices:\n(A) dog\n(B) train\n(C) egg\n(D) cat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.265625, "loss": 4.4734673500061035 }, { "id": "identity_36240", "image": "FigureQA/images/20820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange less than Midnight Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.34375, "loss": 3.8677258491516113 }, { "id": "identity_61478", "image": "DocVQA/images/hpnb0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hospital mentioned in the image?" }, { "from": "gpt", "value": "The answer is HOPITAL BICHAT" } ], "clip_score": 17.375, "loss": 2.219398021697998 }, { "id": "identity_144023", "image": "DocVQA/images/zmwm0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned at the top of the letter?" }, { "from": "gpt", "value": "The answer is November 27, 1973" } ], "clip_score": 22.546875, "loss": 1.831228494644165 }, { "id": "identity_223466", "image": "TextVQA/images/0085245e85b9815d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the phones used in the artwork?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.921875, "loss": 2.7717483043670654 }, { "id": "identity_248650", "image": "A-OKVQA/images/110108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person on the right holding, and how does it relate to the giant fish in the air?" }, { "from": "gpt", "value": "The answer is The person is holding a string connected to the kite" } ], "clip_score": 23.25, "loss": 2.6260313987731934 }, { "id": "identity_108855", "image": "A-OKVQA/images/116004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the shop on the left side of the image?" }, { "from": "gpt", "value": "The answer is Paper Shop" } ], "clip_score": 12.5078125, "loss": 3.7205355167388916 }, { "id": "identity_29265", "image": "GeoQA+/images/3150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,∠C=90°,AD平分∠BAC,DE⊥AB于E,DE=3,∠B=30°,则BC=()\nChoices:\n(A) 7\n(B) 8\n(C) 9\n(D) 10" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 2.4345028400421143 }, { "id": "identity_278708", "image": "AI2D/images/480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If all the red grouse in the food web diagram were to die, which species would be most affected? \n(A) owl\n(B) heather\n(C) fox\n(D) eagle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.25, "loss": 3.527813673019409 }, { "id": "identity_200180", "image": "VQA2.0/images/186717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of aircraft is docked at the waterfront?" }, { "from": "gpt", "value": "The answer is Seaplane" } ], "clip_score": 32.375, "loss": 2.1479718685150146 }, { "id": "identity_31142", "image": "DocVQA/images/lmfv0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page no mentioned in this document?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 17.546875, "loss": 2.4234304428100586 }, { "id": "identity_56502", "image": "FigureQA/images/56033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the segment with the smallest percentage?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 27.40625, "loss": 3.1651833057403564 }, { "id": "identity_106574", "image": "VQA2.0/images/131580.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of electronic device is on the second shelf?" }, { "from": "gpt", "value": "The answer is An old computer monitor" } ], "clip_score": 16.09375, "loss": 3.229919910430908 }, { "id": "identity_36303", "image": "DocVQA/images/trbb0054_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the timing for YA PRESENTATION by Bruce Clark?" }, { "from": "gpt", "value": "The answer is 12:30 - 2:30" } ], "clip_score": 22.796875, "loss": 1.6088459491729736 }, { "id": "identity_53326", "image": "DocVQA/images/nxvg0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of months for the requested program year?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 19.3125, "loss": 1.9790488481521606 }, { "id": "identity_213547", "image": "TextVQA/images/2cb28d8e3863571c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the common theme of the creations displayed in the image?" }, { "from": "gpt", "value": "The answer is Steampunk-themed creations" } ], "clip_score": 24.953125, "loss": 1.986810564994812 }, { "id": "identity_1821", "image": "DocVQA/images/tyml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the fourth meeting of the committee on education held?" }, { "from": "gpt", "value": "The answer is September 5, 1945" } ], "clip_score": 31.953125, "loss": 1.5715997219085693 }, { "id": "identity_132281", "image": "VizWiz/images/VizWiz_train_00005293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the sink made of?" }, { "from": "gpt", "value": "The answer is Ceramic" } ], "clip_score": 29.28125, "loss": 2.9291539192199707 }, { "id": "identity_96442", "image": "TabMWP/images/6665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the store wants to maintain an inventory of 600 balloons, how many more balloons do they need to order based on the sales of the past 4 days?" }, { "from": "gpt", "value": "The answer is 372 balloons" } ], "clip_score": 35.9375, "loss": 2.3330729007720947 }, { "id": "identity_8198", "image": "TextVQA/images/0283104d36470fd6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is on the cup?" }, { "from": "gpt", "value": "The answer is google" } ], "clip_score": 32.875, "loss": 3.5644824504852295 }, { "id": "identity_206350", "image": "VQA2.0/images/65880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player closest to the camera?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 16.546875, "loss": 2.205246686935425 }, { "id": "identity_162495", "image": "GeoQA+/images/937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point D is on arc AC such that AD is twice the length of DC, what is the measure of angle ADC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 26.359375, "loss": 1.605073094367981 }, { "id": "identity_298976", "image": "ScienceQA/images/8290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly describes the transfer of thermal energy during this time: from the surroundings to each aquarium or from each aquarium to the surroundings?\nChoices:\n(A) the surroundings . . . each aquarium\n(B) each aquarium . . . the surroundings" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.765625, "loss": 4.027329921722412 }, { "id": "identity_198474", "image": "PMC-VQA/images/PMC6124378_f1_363672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of dental radiograph is this?" }, { "from": "gpt", "value": "The answer is Periapical radiograph" } ], "clip_score": 33.0625, "loss": 1.739114761352539 }, { "id": "identity_88559", "image": "GeoQA+/images/4666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that D is the midpoint of arc AC and ∠B is 70°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 20°" } ], "clip_score": 31.609375, "loss": 1.9321500062942505 }, { "id": "identity_260264", "image": "DocVQA/images/xgmf0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what year was the revised edition of the American Heart Association Cookbook, published by David McKay, released?" }, { "from": "gpt", "value": "The answer is 1975" } ], "clip_score": 25.9375, "loss": 1.6364260911941528 }, { "id": "identity_100000", "image": "IconQA/images/60944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line to the nearest half centimeter?" }, { "from": "gpt", "value": "The answer is 8.5 centimeters" } ], "clip_score": 31.546875, "loss": 1.4975955486297607 }, { "id": "identity_79002", "image": "GeoQA+/images/1402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the perimeter of parallelogram ABCD is 40cm and AB is 2/3 of BC, what is the ratio of the length of AB to the length of AD?" }, { "from": "gpt", "value": "The answer is 2:3" } ], "clip_score": 31.953125, "loss": 1.4892497062683105 }, { "id": "identity_192145", "image": "PMC-VQA/images/PMC7707631_IJMS-45-485-g001.tif_311710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this brain image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.96875, "loss": 2.2223174571990967 }, { "id": "identity_181586", "image": "IconQA/images/44271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles are in the second column?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.21875, "loss": 2.6731414794921875 }, { "id": "identity_231873", "image": "TextVQA/images/12e6ffc26e9ba34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different elements and colors present on the sweater, and how do they contribute to the overall design?" }, { "from": "gpt", "value": "The answer is Text, stripes, emblem, checkerboard, patches; colors: black, white, red, yellow" } ], "clip_score": 26.390625, "loss": 3.0162711143493652 }, { "id": "identity_311015", "image": "DocVQA/images/ykdv0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of this bank?" }, { "from": "gpt", "value": "The answer is nashville, tennessee" } ], "clip_score": 25.015625, "loss": 2.8041141033172607 }, { "id": "identity_213521", "image": "A-OKVQA/images/412680.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bread is the sandwich made with?" }, { "from": "gpt", "value": "The answer is Biscuit" } ], "clip_score": 22.9375, "loss": 2.4523816108703613 }, { "id": "identity_228820", "image": "TabMWP/images/17155.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of characters in the books Liam read?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 36.3125, "loss": 3.0253427028656006 }, { "id": "identity_237965", "image": "A-OKVQA/images/255708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of the skiing style the child is doing, and how do they help in controlling speed and direction?" }, { "from": "gpt", "value": "The answer is V-shape, friction, weight distribution, and edge control" } ], "clip_score": 30.453125, "loss": 3.978111743927002 }, { "id": "identity_26638", "image": "DVQA/images/bar_train_00080590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart presented in a percentage scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 25.09375, "loss": 3.4931633472442627 }, { "id": "identity_244544", "image": "PMC-VQA/images/PMC4689044_Fig6_455701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are visible in this anteroposterior X-ray of the knee, and what abnormalities can be identified?" }, { "from": "gpt", "value": "The answer is Femur, tibia, fibula, joint space; no visible abnormalities" } ], "clip_score": 33.5, "loss": 3.031749725341797 }, { "id": "identity_218365", "image": "A-OKVQA/images/169026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is driving next to the bus?" }, { "from": "gpt", "value": "The answer is An SUV (Sport Utility Vehicle)" } ], "clip_score": 22.328125, "loss": 1.8122034072875977 }, { "id": "identity_174330", "image": "IconQA/images/101651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the nail from the tip to the start of its head?" }, { "from": "gpt", "value": "The answer is 2 inches" } ], "clip_score": 34.0, "loss": 2.307490110397339 }, { "id": "identity_171121", "image": "IconQA/images/28811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each part of the shape has an area of 5 square units, what is the total area of the pink parts?" }, { "from": "gpt", "value": "The answer is 15 square units" } ], "clip_score": 35.96875, "loss": 1.4559075832366943 }, { "id": "identity_337520", "image": "Geometry3K/images/413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 114\n(B) 118\n(C) 121\n(D) 125" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.125, "loss": 2.5964913368225098 }, { "id": "identity_194405", "image": "TextVQA/images/d383dd516697e184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any monitors turned off in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.078125, "loss": 3.160008430480957 }, { "id": "identity_214945", "image": "PMC-VQA/images/PMC6737727_F3_36291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 34.78125, "loss": 2.3764631748199463 }, { "id": "identity_18742", "image": "Super-CLEVR/images/superCLEVR_new_001382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the matte object on the left side of the gray thing that is in front of the big brown dirtbike?" }, { "from": "gpt", "value": "The answer is dirtbike" } ], "clip_score": 19.09375, "loss": 3.0068068504333496 }, { "id": "identity_86850", "image": "Geometry3K/images/437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment KH?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 27.28125, "loss": 2.6250827312469482 }, { "id": "identity_107389", "image": "VQA2.0/images/262049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cargo is the train carrying?" }, { "from": "gpt", "value": "The answer is It is not possible to determine the exact type of cargo from the image" } ], "clip_score": 25.078125, "loss": 1.631479024887085 }, { "id": "identity_256127", "image": "VQA-AS/images/8709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it likely to be, given that the sun is low in the sky and the shadows are long?" }, { "from": "gpt", "value": "The answer is late afternoon" } ], "clip_score": 20.265625, "loss": 3.1938533782958984 }, { "id": "identity_171450", "image": "TabMWP/images/1012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teachers have more than 5 computers in their classrooms?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.71875, "loss": 2.4852778911590576 }, { "id": "identity_6508", "image": "Super-CLEVR/images/superCLEVR_new_001151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there an equal number of red shiny motorbikes that are in front of the large green minivan and tiny red metal dirtbikes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 27.203125, "loss": 4.7874040603637695 }, { "id": "identity_248458", "image": "VQA2.0/images/256651.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual cues in the image that suggest the man is enjoying his hot dog, and how do these cues collectively indicate enjoyment?" }, { "from": "gpt", "value": "The answer is Facial expression, body language, contextual clues" } ], "clip_score": 34.0, "loss": 3.371457099914551 }, { "id": "identity_87522", "image": "GeoQA+/images/5989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 1 is 40 degrees, what is the measure of angle 2?" }, { "from": "gpt", "value": "The answer is angle 2 = 140 degrees" } ], "clip_score": 32.0625, "loss": 1.7561243772506714 }, { "id": "identity_39628", "image": "DocVQA/images/qkcw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date when the assignment was assigned?" }, { "from": "gpt", "value": "The answer is January 24, 2002" } ], "clip_score": 25.8125, "loss": 1.5890206098556519 }, { "id": "identity_171350", "image": "TabMWP/images/8934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of athletes per country according to the stem and leaf plot?" }, { "from": "gpt", "value": "The answer is 90 - 40 = 50" } ], "clip_score": 32.0625, "loss": 2.2780091762542725 }, { "id": "identity_91610", "image": "Geometry3K/images/1195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the central angle that subtends the arc opposite to the arc subtended by the angle measuring 125°?" }, { "from": "gpt", "value": "The answer is 250°" } ], "clip_score": 31.328125, "loss": 2.1940770149230957 }, { "id": "identity_200860", "image": "A-OKVQA/images/435096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of hats are the riders wearing?" }, { "from": "gpt", "value": "The answer is Wide-brimmed sombreros" } ], "clip_score": 25.90625, "loss": 2.4794998168945312 }, { "id": "identity_64099", "image": "ScienceQA/images/15117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different air temperature zones are represented within the outlined area shown?" }, { "from": "gpt", "value": "The answer is 3 zones" } ], "clip_score": 24.65625, "loss": 2.9357008934020996 }, { "id": "identity_233162", "image": "Geometry3K/images/533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the interior angles of a regular pentagon, and what is the measure of each interior angle?" }, { "from": "gpt", "value": "The answer is The sum is 540 degrees, and each angle is 108 degrees" } ], "clip_score": 30.953125, "loss": 1.6735116243362427 }, { "id": "identity_147727", "image": "DocVQA/images/lywd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the review?" }, { "from": "gpt", "value": "The answer is July 13, 1972" } ], "clip_score": 23.109375, "loss": 1.8446093797683716 }, { "id": "identity_77598", "image": "AI2D/images/4413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of leaf has a tip that narrows to a point?" }, { "from": "gpt", "value": "The answer is Acute" } ], "clip_score": 31.671875, "loss": 3.1569173336029053 }, { "id": "identity_153745", "image": "ScienceQA/images/12261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which large body of water is to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 22.171875, "loss": 2.4004437923431396 }, { "id": "identity_81278", "image": "UniGeo/images/2055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle P1AA'?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 32.90625, "loss": 1.75663161277771 }, { "id": "identity_87004", "image": "Geometry3K/images/603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a diagonal is drawn in the parallelogram, what would be the length of the diagonal?" }, { "from": "gpt", "value": "The answer is 6.93 ft" } ], "clip_score": 30.515625, "loss": 2.342456579208374 }, { "id": "identity_198619", "image": "PMC-VQA/images/PMC6194678_Fig2_383977.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a fracture in the knee joint shown in the image?" }, { "from": "gpt", "value": "The answer is No evidence of fracture" } ], "clip_score": 33.53125, "loss": 3.227391004562378 }, { "id": "identity_133454", "image": "Super-CLEVR/images/superCLEVR_new_000951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is directly below the largest red airplane?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 21.515625, "loss": 3.556415557861328 }, { "id": "identity_282886", "image": "A-OKVQA/images/574946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which mythical creature is favored by the owner of the pink boat?\nChoices:\n(A) peter pan\n(B) mermaids\n(C) tinkerbell\n(D) pinnocchio" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.96875, "loss": 4.1416754722595215 }, { "id": "identity_251667", "image": "IconQA/images/61055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 28.65625, "loss": 1.9910937547683716 }, { "id": "identity_147570", "image": "DocVQA/images/pfjh0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date when the order was received?" }, { "from": "gpt", "value": "The answer is 11-12-68" } ], "clip_score": 20.265625, "loss": 1.9190176725387573 }, { "id": "identity_151492", "image": "AI2D/images/592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What transformation occurs between the pupa and adult stages in the lady beetle's life cycle?" }, { "from": "gpt", "value": "The answer is Metamorphosis from pupa to adult" } ], "clip_score": 40.28125, "loss": 2.063286781311035 }, { "id": "identity_168717", "image": "TabMWP/images/28445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of baskets made?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 32.8125, "loss": 3.292567491531372 }, { "id": "identity_314027", "image": "FigureQA/images/83916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this intersect Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.796875, "loss": 3.7911593914031982 }, { "id": "identity_70832", "image": "TQA/images/question_images/parts_neuron_7219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"myelin sheath\"?" }, { "from": "gpt", "value": "The answer is Insulation and faster transmission of nerve impulses" } ], "clip_score": 30.734375, "loss": 2.3220036029815674 }, { "id": "identity_30774", "image": "MapQA/images/map_12127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Rhode Island have the highest value in the Northeast ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.953125, "loss": 2.4821078777313232 }, { "id": "identity_84154", "image": "GeoQA+/images/4439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If triangle ABC is an isosceles triangle with base BC of length 6, and EF is the perpendicular bisector of AC, what is the measure of angle ACF?" }, { "from": "gpt", "value": "The answer is 22.5 degrees" } ], "clip_score": 30.1875, "loss": 1.7431179285049438 }, { "id": "identity_64557", "image": "AI2D/images/3058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"Golgi Apparatus\" refer to in the cell diagram?" }, { "from": "gpt", "value": "The answer is Golgi Apparatus is involved in protein modification and transport" } ], "clip_score": 27.03125, "loss": 2.2407212257385254 }, { "id": "identity_188680", "image": "PMC-VQA/images/PMC5411224_F2_158445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the patient's body is marked in the image?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 27.609375, "loss": 4.18473482131958 }, { "id": "identity_304473", "image": "TabMWP/images/35994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An economist tracked the amount of per-capita aid sent from the U.S. to various countries during the 1900s. In 1975, which place received more aid per person, Libya or Algeria?\n\nChoices:\n(A) Algeria\n(B) Libya" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.5, "loss": 3.3933358192443848 }, { "id": "identity_78684", "image": "AI2D/images/4907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled as the underground reservoir for magma?" }, { "from": "gpt", "value": "The answer is Magma chamber" } ], "clip_score": 26.90625, "loss": 2.0556702613830566 }, { "id": "identity_231620", "image": "TQA/images/question_images/hydrocarbons_7049.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hydrogen atoms are connected to carbon atoms in the given molecule?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.296875, "loss": 2.1661455631256104 }, { "id": "identity_42262", "image": "FigureQA/images/99540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines on the graph have a positive slope?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.40625, "loss": 2.4097845554351807 }, { "id": "identity_338042", "image": "DocVQA/images/qgcm0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned here?" }, { "from": "gpt", "value": "The answer is June 4, 1973" } ], "clip_score": 18.109375, "loss": 1.9830281734466553 }, { "id": "identity_262993", "image": "UniGeo/images/2790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, AB is the diameter of circle O, and it is known that ∠ACD = 20°. What is the degree measure of ∠BAD?\nChoices:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.90625, "loss": 2.609067916870117 }, { "id": "identity_312563", "image": "DVQA/images/bar_train_00036569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest total units sold across all stores?" }, { "from": "gpt", "value": "The answer is sky" } ], "clip_score": 32.34375, "loss": 3.6329185962677 }, { "id": "identity_148413", "image": "DVQA/images/bar_train_00005899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the \"return\" bar value less than the \"travel\" bar value?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.109375, "loss": 3.3782310485839844 }, { "id": "identity_140055", "image": "DVQA/images/bar_train_00073147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of preference percentage for the object 'shadow'?" }, { "from": "gpt", "value": "The answer is 0 to 20 percent" } ], "clip_score": 29.84375, "loss": 2.0685434341430664 }, { "id": "identity_20844", "image": "VQA-AS/images/14263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man holding?" }, { "from": "gpt", "value": "The answer is frisbee" } ], "clip_score": 30.28125, "loss": 2.4546611309051514 }, { "id": "identity_180885", "image": "TabMWP/images/25690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of one cookbook and one map of the world?" }, { "from": "gpt", "value": "The answer is $29.00" } ], "clip_score": 32.71875, "loss": 1.6676843166351318 }, { "id": "identity_19879", "image": "A-OKVQA/images/295499.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the large girl's t-shirt who is sitting on the bench on the basketball game?\nChoices:\n(A) orange\n(B) white\n(C) green\n(D) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.828125, "loss": 4.124886989593506 }, { "id": "identity_268902", "image": "UniGeo/images/1915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, where angle C is 90° and angle B is 20°, arcs are drawn with A as the center intersecting AB at points M and N. Further arcs are drawn with M and N as centers, with lengths greater than MN as radii. The two arcs intersect at point P, and a line is drawn connecting AP and extended to point D. What is the degree measure of angle ADB?\nChoices:\n(A) 75°\n(B) 105°\n(C) 110°\n(D) 125°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.28125, "loss": 2.3736305236816406 }, { "id": "identity_56890", "image": "ChartQA/images/two_col_348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of GDP in the year 2000?" }, { "from": "gpt", "value": "The answer is 1,060.9 billion U.S. dollars" } ], "clip_score": 22.640625, "loss": 1.9458893537521362 }, { "id": "identity_270992", "image": "TextVQA/images/6ad84631a4e64421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year is located at the bottom?" }, { "from": "gpt", "value": "The answer is 2007" } ], "clip_score": 13.1015625, "loss": 1.5919462442398071 }, { "id": "identity_72077", "image": "AI2D/images/4684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird has the widest beak base?" }, { "from": "gpt", "value": "The answer is Geospiza magnirostris" } ], "clip_score": 31.328125, "loss": 2.0679097175598145 }, { "id": "identity_122057", "image": "VQA-AS/images/458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is on the left side of the image?" }, { "from": "gpt", "value": "The answer is Oak tree" } ], "clip_score": 18.78125, "loss": 2.690234422683716 }, { "id": "identity_297126", "image": "TabMWP/images/30051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the data provided in the table, what was the difference in the number of leftover loaves of bread between Wednesday and Thursday for the vendor at the farmers' market?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 41.03125, "loss": 2.820315361022949 }, { "id": "identity_234375", "image": "GeoQA+/images/3181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD∥BE∥CF,直线m,n与这三条平行线分别交于点A,B,C和点D,E,F,已知AB=5,BC=10,DE=4,若三角形ABC的面积为30,则三角形DEF的面积为多少?" }, { "from": "gpt", "value": "The answer is 19.2" } ], "clip_score": 28.734375, "loss": 2.2675857543945312 }, { "id": "identity_200351", "image": "PMC-VQA/images/PMC6410505_Fig1_448290.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to capture these images?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scans" } ], "clip_score": 28.953125, "loss": 1.8638170957565308 }, { "id": "identity_266062", "image": "IconQA/images/66890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the shape from the following options: cone, cube, or sphere.\nChoices:\n(A) cone\n(B) cube\n(C) sphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5, "loss": 4.094611167907715 }, { "id": "identity_306465", "image": "FigureQA/images/49058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Salmon the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.125, "loss": 3.936586856842041 }, { "id": "identity_118592", "image": "A-OKVQA/images/44504.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any product with a blue cap visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.640625, "loss": 3.102121591567993 }, { "id": "identity_207510", "image": "PMC-VQA/images/PMC6694561_Fig5_23766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are predominantly visible in this image?" }, { "from": "gpt", "value": "The answer is Smooth muscle cells" } ], "clip_score": 23.1875, "loss": 3.1857666969299316 }, { "id": "identity_262478", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is there a netting behind the batter?\nChoices:\n(A) safety\n(B) practice\n(C) decoration\n(D) style" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.875, "loss": 4.382620811462402 }, { "id": "identity_88904", "image": "UniGeo/images/2483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if AE:EB=1:2 and the area of triangle AEF is 6, what is the perimeter of parallelogram ABCD if AB=9?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 36.03125, "loss": 2.0428240299224854 }, { "id": "identity_103844", "image": "VQA2.0/images/69498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrella in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.7265625, "loss": 3.8892695903778076 }, { "id": "identity_178217", "image": "IconQA/images/19966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are squares?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.546875, "loss": 2.5647242069244385 }, { "id": "identity_102948", "image": "TabMWP/images/15039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have exactly 50 athletes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.546875, "loss": 2.364534378051758 }, { "id": "identity_118530", "image": "TextVQA/images/1b1e3f57e92fdb2c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cargo train?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.15625, "loss": 3.8079020977020264 }, { "id": "identity_174882", "image": "IconQA/images/70137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are neither green nor circles?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.921875, "loss": 2.546539068222046 }, { "id": "identity_93925", "image": "CLEVR-Math/images/CLEVR_train_041044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red objects are present?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.78125, "loss": 2.268752336502075 }, { "id": "identity_47335", "image": "DVQA/images/bar_train_00122464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the 'tube' category 'talk'?" }, { "from": "gpt", "value": "The answer is 10^2 to 10^4" } ], "clip_score": 22.03125, "loss": 1.6340984106063843 }, { "id": "identity_108570", "image": "A-OKVQA/images/163479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more people standing than sitting on the beach?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.609375, "loss": 2.8882381916046143 }, { "id": "identity_71059", "image": "ScienceQA/images/3684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic feature can be observed in the rock shown in the image that indicates its metamorphic nature?" }, { "from": "gpt", "value": "The answer is Fused and interlocked grains" } ], "clip_score": 30.421875, "loss": 4.208937644958496 }, { "id": "identity_136104", "image": "DocVQA/images/xqjf0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many critical success factors are listed in the document?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.25, "loss": 2.1220247745513916 }, { "id": "identity_282919", "image": "PMC-VQA/images/PMC7256210_fig0020_180408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the caption, where is the recurrent OKC located among the following choices?\n(A) Left maxillary sinus\n(B) Anterior wall of right maxillary sinus\n(C) Posterior wall of right maxillary sinus\n(D) Frontal sinus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.9375, "loss": 4.2931108474731445 }, { "id": "identity_261741", "image": "A-OKVQA/images/389401.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the purpose of the black object connecting the bike to the pole in terms of its function?\nChoices:\n(A) pulley\n(B) wrench\n(C) ramp\n(D) lock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.171875, "loss": 4.380581378936768 }, { "id": "identity_175917", "image": "IconQA/images/43042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern of the sequence?" }, { "from": "gpt", "value": "The answer is bear, train, X" } ], "clip_score": 19.390625, "loss": 3.6706466674804688 }, { "id": "identity_263843", "image": "UniGeo/images/572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, with circle O and radius OC perpendicular to chord AB, point D lies on minor arc AB (not coinciding with points A, B). If ∠AOC = 50°, what is the measure of ∠CDB? \n(A) 25°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.59375, "loss": 2.1615653038024902 }, { "id": "identity_92420", "image": "TabMWP/images/10428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magazines have more than 20 articles?" }, { "from": "gpt", "value": "The answer is 7 magazines" } ], "clip_score": 36.5625, "loss": 2.225735664367676 }, { "id": "identity_236911", "image": "DVQA/images/bar_train_00056941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item with the highest sales in the \"stay\" store across all items?" }, { "from": "gpt", "value": "The answer is 30 units" } ], "clip_score": 28.421875, "loss": 2.2084031105041504 }, { "id": "identity_246662", "image": "VQA2.0/images/85501.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different platform letters are shown, and what are they?" }, { "from": "gpt", "value": "The answer is four: A, B, C, and D" } ], "clip_score": 13.4140625, "loss": 1.7647818326950073 }, { "id": "identity_157745", "image": "AI2D/images/1834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many herbivores are depicted in this food web?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.34375, "loss": 2.671935796737671 }, { "id": "identity_242924", "image": "PMC-VQA/images/PMC5240427_Fig3_115040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organ with the second highest FDG uptake and describe its anatomical location relative to the organ with the highest uptake" }, { "from": "gpt", "value": "The answer is Liver, located inferior to the lungs" } ], "clip_score": 22.96875, "loss": 3.279376268386841 }, { "id": "identity_281638", "image": "VQA2.0/images/435454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time shown on the clock?" }, { "from": "gpt", "value": "The answer is 3:55" } ], "clip_score": 11.921875, "loss": 1.9436787366867065 }, { "id": "identity_249046", "image": "PMC-VQA/images/PMC5225184_Fig1_112654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brain region(s) are indicated by the green color in the image, and what are their coordinates in the X, Y, and Z planes?" }, { "from": "gpt", "value": "The answer is Thalamus, Z = 2" } ], "clip_score": 24.984375, "loss": 3.1949703693389893 }, { "id": "identity_170104", "image": "IconQA/images/73365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 30 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 6:00" } ], "clip_score": 31.78125, "loss": 1.8020405769348145 }, { "id": "identity_252513", "image": "IconQA/images/677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Arrange the activities Gabby did on Monday morning in chronological order and calculate the total time spent from the first activity to the last activity" }, { "from": "gpt", "value": "The answer is 2 hours" } ], "clip_score": 23.828125, "loss": 3.1427242755889893 }, { "id": "identity_168808", "image": "IconQA/images/37135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are chosen at random, what is the probability that both are the same color?" }, { "from": "gpt", "value": "The answer is 2/5" } ], "clip_score": 29.53125, "loss": 2.0328643321990967 }, { "id": "identity_174906", "image": "TabMWP/images/22455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average amount Cora spent on lunch over the five days?" }, { "from": "gpt", "value": "The answer is $5.4" } ], "clip_score": 39.96875, "loss": 2.396003246307373 }, { "id": "identity_97329", "image": "IconQA/images/34609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one shape is removed, how many sides will be left in total?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.546875, "loss": 1.972575306892395 }, { "id": "identity_127103", "image": "PMC-VQA/images/PMC8841721_F2_193712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color predominantly outlines the circular structures in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.421875, "loss": 3.387678623199463 }, { "id": "identity_278763", "image": "Super-CLEVR/images/superCLEVR_new_001916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects that are either tiny rubber objects located to the left of the tiny metallic wagon or small brown metal cars positioned in front of the small metal mountain bike?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.34375, "loss": 2.1938846111297607 }, { "id": "identity_281062", "image": "FigureQA/images/29307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the median with a high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.265625, "loss": 4.017956733703613 }, { "id": "identity_337158", "image": "MapQA/images/map_4958.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value for this state?" }, { "from": "gpt", "value": "The answer is 651-775" } ], "clip_score": 26.265625, "loss": 1.9818944931030273 }, { "id": "identity_299772", "image": "TextVQA/images/049d516ccb244077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letters are visible on the blouses worn by the girls?" }, { "from": "gpt", "value": "The answer is agi" } ], "clip_score": 18.265625, "loss": 3.906395673751831 }, { "id": "identity_317115", "image": "AI2D/images/2870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which among the below is referred to as a windpipe?\nChoices:\n(A) A\n(B) C\n(C) E\n(D) B" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.7421875, "loss": 3.8800110816955566 }, { "id": "identity_87807", "image": "GeoQA+/images/3047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AC if BC = 6?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.703125, "loss": 2.790051221847534 }, { "id": "identity_104159", "image": "TextVQA/images/27cbe36c9c756d6a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the background where the numbers are printed on the ruler?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 30.859375, "loss": 2.80183482170105 }, { "id": "identity_334518", "image": "A-OKVQA/images/294176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time is depicted?\nChoices:\n(A) late night\n(B) rush hour\n(C) sunset\n(D) sunrise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.875, "loss": 4.538092613220215 }, { "id": "identity_112306", "image": "VQA2.0/images/117684.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the team represented by the neon sign in the image?" }, { "from": "gpt", "value": "The answer is Orioles" } ], "clip_score": 25.8125, "loss": 2.880096197128296 }, { "id": "identity_141230", "image": "DocVQA/images/kgyd0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is James E. Murray" } ], "clip_score": 23.390625, "loss": 3.566425085067749 }, { "id": "identity_106888", "image": "PMC-VQA/images/PMC6890341_F5_77829.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of surgical intervention on the right hip?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.46875, "loss": 3.2585959434509277 }, { "id": "identity_135583", "image": "PlotQA/images/47486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people living with HIV who are receiving antiretroviral therapy in Lebanon in 2001?" }, { "from": "gpt", "value": "The answer is 4%" } ], "clip_score": 33.875, "loss": 2.149156093597412 }, { "id": "identity_292818", "image": "TextVQA/images/1d46818a0c8a0158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much does the bottle with the blue label cost?" }, { "from": "gpt", "value": "The answer is 11.99" } ], "clip_score": 24.84375, "loss": 1.7488940954208374 }, { "id": "identity_330456", "image": "A-OKVQA/images/115862.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What gesture are they doing?\nChoices:\n(A) wave\n(B) fist\n(C) hang ten\n(D) salute" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.8984375, "loss": 3.717050075531006 }, { "id": "identity_269781", "image": "PMC-VQA/images/PMC8660657_F2_136124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of scan was conducted on the patient, with the options being CT scan, SWI MRI scan, PET scan, and Ultrasound scan?\nChoices:\n(A) CT scan \n(B) SWI MRI scan \n(C) PET scan \n(D) Ultrasound scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.96875, "loss": 3.7130484580993652 }, { "id": "identity_131051", "image": "TextVQA/images/02748cc67037e249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands can be identified in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.3125, "loss": 3.9611103534698486 }, { "id": "identity_100718", "image": "TabMWP/images/38211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one bowl is taken away, how many cherries are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.578125, "loss": 2.270000696182251 }, { "id": "identity_232839", "image": "ScienceQA/images/11363.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Determine the atomic number of Rhenium and identify its group and period in the periodic table" }, { "from": "gpt", "value": "The answer is Atomic number 75, Group 7, Period 6" } ], "clip_score": 21.53125, "loss": 1.4814162254333496 }, { "id": "identity_134591", "image": "PlotQA/images/68490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of agricultural land for Argentina across the years 1961 to 1964?" }, { "from": "gpt", "value": "The answer is Approximately 30%" } ], "clip_score": 27.515625, "loss": 2.202714443206787 }, { "id": "identity_118403", "image": "TextVQA/images/26f2163d886f14f1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the stands?" }, { "from": "gpt", "value": "The answer is Count of visible people" } ], "clip_score": 11.7421875, "loss": 3.910874366760254 }, { "id": "identity_322287", "image": "PMC-VQA/images/PMC3552813_f4_180745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the black arrows in this image indicate? \nChoices:\n(A) Areas of unfilled tunneling and resorption space \n(B) Fatty bone marrow changes \n(C) Cartilage bars in cortical bone \n(D) Areas of cortical bone loss" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.828125, "loss": 5.165118217468262 }, { "id": "identity_316578", "image": "PMC-VQA/images/PMC6775207_F1_45250.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What brain abnormality is depicted in this image?\nChoices:\n(A) Agenesis of the Corpus Callosum\n(B) Schizencephaly\n(C) Aqueductal Stenosis\n(D) Parkinson's disease" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.9375, "loss": 4.810652256011963 }, { "id": "identity_110419", "image": "TextVQA/images/320ae96161796489.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the team opposing Liverpool in the match?" }, { "from": "gpt", "value": "The answer is Villa" } ], "clip_score": 26.65625, "loss": 2.6337170600891113 }, { "id": "identity_89167", "image": "GeoQA+/images/63.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A,B,C,D四个点均在⊙O上,AO∥DC,∠AOD=20°,则∠COD为多少度? (In the diagram, points A, B, C, and D are all on circle O, AO is parallel to DC, and ∠AOD = 20°, what is the measure of ∠COD?)" }, { "from": "gpt", "value": "The answer is 20°" } ], "clip_score": 33.53125, "loss": 1.7081117630004883 }, { "id": "identity_41826", "image": "DVQA/images/bar_train_00104273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference for 'dollar' between the 'reach' and 'rage' categories?" }, { "from": "gpt", "value": "The answer is 6-4=2 people" } ], "clip_score": 24.8125, "loss": 3.391148567199707 }, { "id": "identity_219131", "image": "A-OKVQA/images/425772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible buoy in the image?" }, { "from": "gpt", "value": "The answer is Yes, orange" } ], "clip_score": 16.171875, "loss": 5.385319709777832 }, { "id": "identity_333121", "image": "DocVQA/images/zzcw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for asking V. Montgomery Rice about submitting an abstract to NBNA/NMA?" }, { "from": "gpt", "value": "The answer is C. Steele" } ], "clip_score": 16.625, "loss": 2.7889599800109863 }, { "id": "identity_284450", "image": "TextVQA/images/92a38413fb3ba70b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which club is this?" }, { "from": "gpt", "value": "The answer is land rover club" } ], "clip_score": 27.3125, "loss": 2.110851764678955 }, { "id": "identity_38894", "image": "FigureQA/images/14319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Blue Violet section larger than the Medium Orchid section?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.25, "loss": 3.287487506866455 }, { "id": "identity_12846", "image": "VQA2.0/images/294811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they many kites?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.2109375, "loss": 2.6282575130462646 }, { "id": "identity_48220", "image": "FigureQA/images/98628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of Orchid on the y-axis label?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 23.890625, "loss": 2.53067946434021 }, { "id": "identity_306440", "image": "VQA2.0/images/283884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a large body of water in this picture?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.203125, "loss": 3.254779100418091 }, { "id": "identity_235363", "image": "GeoQA+/images/1819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠BAC=90°,AB=3,AC=4,P为边BC上一动点,PE⊥AB于E,PF⊥AC于F,求EF的最小值时,点P到点B的距离是多少?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 29.4375, "loss": 1.7220261096954346 }, { "id": "identity_280028", "image": "DVQA/images/bar_train_00080168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy when combining the accuracies of the algorithms cheek and inch?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 36.125, "loss": 2.4212145805358887 }, { "id": "identity_215329", "image": "VQA2.0/images/65894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.734375, "loss": 2.277555465698242 }, { "id": "identity_62699", "image": "TQA/images/abc_question_images/earth_eclipses_11627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter U represent in the diagram?" }, { "from": "gpt", "value": "The answer is The Moon" } ], "clip_score": 27.484375, "loss": 4.648891925811768 }, { "id": "identity_154836", "image": "ScienceQA/images/19464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which large body of water is located to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Appalachian Mountains (Note: There is no large body of water directly east of Kentucky, but the Appalachian Mountains are a significant geographical feature in that direction)" } ], "clip_score": 28.1875, "loss": 1.448960781097412 }, { "id": "identity_22140", "image": "A-OKVQA/images/523754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many surfboards are there?\nChoices:\n(A) seven\n(B) five\n(C) nine\n(D) four" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.796875, "loss": 3.3629844188690186 }, { "id": "identity_43265", "image": "DocVQA/images/fjlm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the foundation mentioned in the letter?" }, { "from": "gpt", "value": "The answer is The Nutrition Foundation, Inc" } ], "clip_score": 17.984375, "loss": 2.467195510864258 }, { "id": "identity_170695", "image": "TabMWP/images/20180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of red buttons per bag?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 23.734375, "loss": 2.7983219623565674 }, { "id": "identity_87174", "image": "GeoQA+/images/424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle AEF is 3 cm^2, what is the area of quadrilateral BEFC?" }, { "from": "gpt", "value": "The answer is 6 cm^2" } ], "clip_score": 33.28125, "loss": 1.1326491832733154 }, { "id": "identity_101435", "image": "IconQA/images/32110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are in the orange circle but not in the square circle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.625, "loss": 2.3871243000030518 }, { "id": "identity_94945", "image": "IconQA/images/14795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices does this shape have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.46875, "loss": 2.412855863571167 }, { "id": "identity_45698", "image": "FigureQA/images/97581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Royal Blue line on the graph?" }, { "from": "gpt", "value": "The answer is 95" } ], "clip_score": 29.0625, "loss": 2.6590259075164795 }, { "id": "identity_40377", "image": "FigureQA/images/20778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color Orange representing more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 35.53125, "loss": 2.8960914611816406 }, { "id": "identity_1878", "image": "MapQA/images/map_2264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the USA ?" }, { "from": "gpt", "value": "The answer is 44.3%-45.2%" } ], "clip_score": 25.40625, "loss": 2.100078821182251 }, { "id": "identity_256031", "image": "PMC-VQA/images/PMC7600047_cancers-12-02937-f002_286941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of the suspected fungal lesion seen in the MRI image, and how do they differentiate it from the other options (cyst, scar tissue, hematoma)?" }, { "from": "gpt", "value": "The answer is Mixed intensity with hypointense rim, characteristic of a fungal lesion" } ], "clip_score": 29.140625, "loss": 3.482192277908325 }, { "id": "identity_126238", "image": "TextVQA/images/01209963ab93abd9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tie of the person in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.2734375, "loss": 2.820209503173828 }, { "id": "identity_22110", "image": "IconQA/images/2119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Can you trace a triangle with this shape?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4296875, "loss": 3.8201193809509277 }, { "id": "identity_192327", "image": "TextVQA/images/13a983a9d8bc86e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the watch?" }, { "from": "gpt", "value": "The answer is Breitling" } ], "clip_score": 33.9375, "loss": 2.455338954925537 }, { "id": "identity_143947", "image": "FigureQA/images/64132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Dark Orange ever surpass the 40 mark on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.09375, "loss": 3.0382332801818848 }, { "id": "identity_115363", "image": "VQA2.0/images/432119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What artistic effect is used throughout the painting?" }, { "from": "gpt", "value": "The answer is Distortion" } ], "clip_score": 22.125, "loss": 2.78314471244812 }, { "id": "identity_285480", "image": "DocVQA/images/rxvh0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the requested and recommended budget for the fiscal year 1976-1977?" }, { "from": "gpt", "value": "The answer is $476,468" } ], "clip_score": 22.296875, "loss": 2.532362937927246 }, { "id": "identity_122536", "image": "VQA2.0/images/150769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flags are visible in the image?" }, { "from": "gpt", "value": "The answer is Two flags" } ], "clip_score": 10.8828125, "loss": 4.4078049659729 }, { "id": "identity_306922", "image": "AI2D/images/290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How does energy move according to this food chain?\nChoices:\n(A) killer whale- sea turtle\n(B) large fish- small fish\n(C) corals- sea turtle\n(D) none of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.96875, "loss": 3.159700870513916 }, { "id": "identity_254443", "image": "PMC-VQA/images/PMC7778613_Fig1_333550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, what is the most likely diagnosis, and what are the potential complications associated with this condition?" }, { "from": "gpt", "value": "The answer is Skull fracture; complications include hemorrhage, infection, brain injury, seizures" } ], "clip_score": 30.453125, "loss": 2.192577600479126 }, { "id": "identity_223276", "image": "Super-CLEVR/images/superCLEVR_new_001310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the front of the red car facing?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 19.15625, "loss": 3.6121745109558105 }, { "id": "identity_42009", "image": "DVQA/images/bar_train_00080798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the 'root' category in 'lesson' and 'jar' category in 'stop'?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.90625, "loss": 2.694415330886841 }, { "id": "identity_185789", "image": "VQA2.0/images/196061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sailboats are visible in the background?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 25.25, "loss": 2.2833938598632812 }, { "id": "identity_304982", "image": "FigureQA/images/57809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum value for Dark Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.65625, "loss": 4.458822250366211 }, { "id": "identity_10864", "image": "VQA2.0/images/102168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this item a toy or for professionals?" }, { "from": "gpt", "value": "The answer is toy" } ], "clip_score": 18.359375, "loss": 2.329092502593994 }, { "id": "identity_179172", "image": "IconQA/images/37399.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more hearts or sides of hearts?" }, { "from": "gpt", "value": "The answer is Sides" } ], "clip_score": 29.890625, "loss": 2.9101264476776123 }, { "id": "identity_29958", "image": "MapQA/images/map_9439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border Nevada , does Utah have the lowest value ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.015625, "loss": 2.450209856033325 }, { "id": "identity_262314", "image": "GeoQA+/images/3433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, points E and F are the midpoints of BD and CE, respectively. If the area of triangle AEF is 3, what is the area of triangle ABC?\nChoices:\n(A) 9\n(B) 10\n(C) 11\n(D) 12" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.09375, "loss": 2.157416820526123 }, { "id": "identity_217473", "image": "VQA2.0/images/154817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of dispenser is mounted on the wall to the left of the sink?" }, { "from": "gpt", "value": "The answer is Soap dispenser" } ], "clip_score": 28.015625, "loss": 2.1294872760772705 }, { "id": "identity_51987", "image": "FigureQA/images/2743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values represented on the y-axis?" }, { "from": "gpt", "value": "The answer is 0.12" } ], "clip_score": 21.125, "loss": 2.5420215129852295 }, { "id": "identity_6900", "image": "ScienceQA/images/16191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which air temperature was measured within the outlined area shown?\nChoices:\n(A) -18°C\n(B) 12°C\n(C) 5°C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.0625, "loss": 3.8094820976257324 }, { "id": "identity_147127", "image": "DocVQA/images/lmpv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned on the document?" }, { "from": "gpt", "value": "The answer is December 6, 1967" } ], "clip_score": 22.796875, "loss": 1.9206658601760864 }, { "id": "identity_85591", "image": "GeoQA+/images/693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rhombus ABCD, if AC = 6 and BD = 4, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 2 * arctan(2/3)" } ], "clip_score": 35.78125, "loss": 1.4873124361038208 }, { "id": "identity_286455", "image": "A-OKVQA/images/527098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can you observe a scene similar to what is occurring behind the boats?\nChoices:\n(A) parking garage\n(B) king kullen\n(C) six flags\n(D) costco" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.1875, "loss": 4.224836349487305 }, { "id": "identity_213150", "image": "PMC-VQA/images/PMC5960165_Fig3_316271.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant grey matter structure visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Basal ganglia" } ], "clip_score": 34.78125, "loss": 2.913140058517456 }, { "id": "identity_187262", "image": "PMC-VQA/images/PMC4330234_Fig3_358791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shade in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 20.84375, "loss": 4.278406620025635 }, { "id": "identity_90885", "image": "Geometry3K/images/1113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side AD in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 35.28125, "loss": 2.371778726577759 }, { "id": "identity_163811", "image": "GeoQA+/images/157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if AB = AC = 10 and the area of triangle ABC is 20, what is the length of BC?" }, { "from": "gpt", "value": "The answer is BC = 10.77 (approximated to two decimal places)" } ], "clip_score": 33.5625, "loss": 1.7906677722930908 }, { "id": "identity_306817", "image": "DVQA/images/bar_train_00077048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of island in one category larger than the value of draft in another category?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.828125, "loss": 3.58638072013855 }, { "id": "identity_185803", "image": "VQA2.0/images/79077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the boat in the foreground?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 22.734375, "loss": 3.5049920082092285 }, { "id": "identity_105962", "image": "VQA2.0/images/174601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many windows can be seen on the blue building?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.15625, "loss": 2.403963565826416 }, { "id": "identity_332494", "image": "ScienceQA/images/10821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Consider the magnetic force between the magnets in each set. Which of the following statements is accurate?\nChoices:\n(A) The magnitude of the magnetic force is greater in the second set.\n(B) The magnitude of the magnetic force is greater in the first set.\n(C) The magnitude of the magnetic force is the same in both sets." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.40625, "loss": 3.4248924255371094 }, { "id": "identity_162287", "image": "GeoQA+/images/4009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADC in the given circle diagram?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 31.5625, "loss": 2.206068992614746 }, { "id": "identity_193486", "image": "VizWiz/images/VizWiz_train_00010514.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the soup from?" }, { "from": "gpt", "value": "The answer is Sainsbury's" } ], "clip_score": 27.515625, "loss": 2.307237148284912 }, { "id": "identity_149195", "image": "DocVQA/images/gpbf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Lela Evangeline Booher become a Charter Member of the American Institute of Nutrition?" }, { "from": "gpt", "value": "The answer is 1933" } ], "clip_score": 24.0, "loss": 1.8437254428863525 }, { "id": "identity_228175", "image": "FigureQA/images/74505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the highest value at x-axis_label = 50?" }, { "from": "gpt", "value": "The answer is Dark Turquoise" } ], "clip_score": 31.3125, "loss": 1.7700519561767578 }, { "id": "identity_222041", "image": "A-OKVQA/images/494579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any white animals in the image?" }, { "from": "gpt", "value": "The answer is Yes, there are white goats" } ], "clip_score": 20.625, "loss": 2.724841594696045 }, { "id": "identity_118704", "image": "PMC-VQA/images/PMC8128403_f3_443737.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any lettering present in the image?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.953125, "loss": 3.379904270172119 }, { "id": "identity_190497", "image": "TextVQA/images/e16a302650aefcb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clothing racks are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.140625, "loss": 2.5290682315826416 }, { "id": "identity_276117", "image": "TextVQA/images/55b59f6af596156f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which player is wearing the yellow shirt?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 18.59375, "loss": 1.83968985080719 }, { "id": "identity_246904", "image": "PMC-VQA/images/PMC5684645_F3_239345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the labels used in the images to indicate the locations of both AIS and the soma, and how do these labels help in distinguishing between the two structures?" }, { "from": "gpt", "value": "The answer is mCherry for soma, Axon for AIS" } ], "clip_score": 24.859375, "loss": 3.253973960876465 }, { "id": "identity_58144", "image": "PlotQA/images/46532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the total male labor force aged 20-50 in 2012?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 35.1875, "loss": 1.7928814888000488 }, { "id": "identity_35259", "image": "TabMWP/images/18491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Jasper buys 1 kilogram of plain applesauce. How much does he spend?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.8125, "loss": 3.5467777252197266 }, { "id": "identity_90931", "image": "Geometry3K/images/1181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the parallelogram were to be transformed into a rectangle with the same base and height, what would be the area of the rectangle?" }, { "from": "gpt", "value": "The answer is 104/3" } ], "clip_score": 33.125, "loss": 2.9581618309020996 }, { "id": "identity_331317", "image": "IconQA/images/12831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many of these are there?\nChoices:\n(A) 1\n(B) 2\n(C) 10\n(D) 9\n(E) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.328125, "loss": 2.820312261581421 }, { "id": "identity_208151", "image": "A-OKVQA/images/447082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person seated on the fourth step from the bottom wearing a hat?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.59375, "loss": 3.687523603439331 }, { "id": "identity_9633", "image": "PMC-VQA/images/PMC8876065_Fig1_205899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the observed abnormalities in the brain imaging? \nChoices:\n(A) Atrophy of the hippocampus and thalamus \n(B) Enlarged bilateral hippocampus \n(C) Enlarged bilateral ventricle, iron deposition in the bilateral putamen, and a hyperintense putaminal rim \n(D) Abnormalities in the optic nerve. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.125, "loss": 3.4072723388671875 }, { "id": "identity_205319", "image": "A-OKVQA/images/153817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the express train shown in the image?" }, { "from": "gpt", "value": "The answer is The Cathedrals Express" } ], "clip_score": 30.0625, "loss": 1.68435537815094 }, { "id": "identity_266483", "image": "AI2D/images/1847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the given food web and your scientific knowledge, which of the following pairs represents the primary consumer in the food web?\nChoices:\n(A) snail, earthworm\n(B) lady bird, hedgehog\n(C) hedgehog, fox\n(D) hedgehog, stoat" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.09375, "loss": 3.6838455200195312 }, { "id": "identity_99422", "image": "IconQA/images/18815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green objects are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.0, "loss": 2.169858932495117 }, { "id": "identity_97265", "image": "IconQA/images/1206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many crabs are there in the sequence?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.265625, "loss": 2.481559991836548 }, { "id": "identity_71694", "image": "TQA/images/question_images/biomes_6568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What biome is located in the western part of India according to the map?" }, { "from": "gpt", "value": "The answer is Tropical Seasonal" } ], "clip_score": 30.203125, "loss": 2.373164415359497 }, { "id": "identity_272109", "image": "MapQA/images/map_8533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the monetary worth of South Dakota?" }, { "from": "gpt", "value": "The answer is 57.8%-58.5%" } ], "clip_score": 25.328125, "loss": 2.2560532093048096 }, { "id": "identity_233290", "image": "DVQA/images/bar_train_00165645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'axis' and 'joy' algorithms for the dataset labeled 'track'?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 35.875, "loss": 2.980069160461426 }, { "id": "identity_247305", "image": "IconQA/images/58355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows contain exactly 9 ovals?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 28.21875, "loss": 2.6994693279266357 }, { "id": "identity_151579", "image": "ScienceQA/images/19628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country borders the highlighted country to the west?" }, { "from": "gpt", "value": "The answer is Guatemala" } ], "clip_score": 23.71875, "loss": 2.950167179107666 }, { "id": "identity_266828", "image": "DVQA/images/bar_train_00153113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do fewer people prefer fuel in the walk category compared to tool in the tail category?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.375, "loss": 3.3945865631103516 }, { "id": "identity_1504", "image": "TabMWP/images/26346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the table. Then answer the question. At a price of $385, is there a shortage or a surplus?\nChoices:\n(A) shortage\n(B) surplus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 3.602273941040039 }, { "id": "identity_315327", "image": "MapQA/images/map_7710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value for Texas?" }, { "from": "gpt", "value": "The answer is 10.1%-10.7%" } ], "clip_score": 24.03125, "loss": 2.04415225982666 }, { "id": "identity_172079", "image": "TabMWP/images/31283.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the customers preferred a sandwich other than the listed options?" }, { "from": "gpt", "value": "The answer is 8.79%" } ], "clip_score": 38.4375, "loss": 2.118326187133789 }, { "id": "identity_51576", "image": "DocVQA/images/qffn0226_164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main topic of the DVD?" }, { "from": "gpt", "value": "The answer is High Stability and Low Wear" } ], "clip_score": 25.96875, "loss": 2.016761541366577 }, { "id": "identity_92163", "image": "IconQA/images/41339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all the shapes on the frame?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 31.96875, "loss": 2.000619649887085 }, { "id": "identity_306871", "image": "DocVQA/images/gnjh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is Preliminary Project Proposal" } ], "clip_score": 26.171875, "loss": 2.0285661220550537 }, { "id": "identity_124880", "image": "VizWiz/images/VizWiz_train_00006572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the can?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 31.796875, "loss": 3.2970352172851562 }, { "id": "identity_145370", "image": "DVQA/images/bar_train_00171697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories of preferred objects are there in the graph?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.1875, "loss": 2.444732904434204 }, { "id": "identity_215455", "image": "TextVQA/images/e46e0580978e4d4c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many keyboards are present in the room?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 13.9609375, "loss": 2.4724104404449463 }, { "id": "identity_249883", "image": "ScienceQA/images/1458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of these objects is both smooth and can hold liquid?" }, { "from": "gpt", "value": "The answer is ceramic mug and water pitcher" } ], "clip_score": 34.0, "loss": 1.7298451662063599 }, { "id": "identity_305373", "image": "IconQA/images/78944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.625, "loss": 4.209887504577637 }, { "id": "identity_302345", "image": "DocVQA/images/lfdh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who prepared this report?" }, { "from": "gpt", "value": "The answer is dr. george e. bunce" } ], "clip_score": 22.109375, "loss": 3.244394063949585 }, { "id": "identity_261229", "image": "GeoQA+/images/2929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,DA切⊙O于点A,DO交⊙O于点C,连接BC,若∠D=40°,求∠CBO的大小。" }, { "from": "gpt", "value": "The answer is 45°" } ], "clip_score": 25.9375, "loss": 1.935742974281311 }, { "id": "identity_231891", "image": "DocVQA/images/llml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address, including the street name, city, and state, mentioned in the given letter?" }, { "from": "gpt", "value": "The answer is 99 Wall Street, New York, N.Y" } ], "clip_score": 20.703125, "loss": 1.855667233467102 }, { "id": "identity_172512", "image": "TabMWP/images/305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ferry has the highest fare for cars?" }, { "from": "gpt", "value": "The answer is Fauntleroy-Vashon" } ], "clip_score": 38.5625, "loss": 1.5333949327468872 }, { "id": "identity_21609", "image": "A-OKVQA/images/509811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns those laptops?\nChoices:\n(A) one individual\n(B) library\n(C) multiple individuals\n(D) non profit" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.140625, "loss": 4.454164981842041 }, { "id": "identity_218495", "image": "A-OKVQA/images/115145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light fixtures are installed on the ceiling?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.0, "loss": 4.222470760345459 }, { "id": "identity_200589", "image": "PMC-VQA/images/PMC6475549_fig1_468927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the structure indicated by the black arrow likely to be?" }, { "from": "gpt", "value": "The answer is Bronchus" } ], "clip_score": 19.046875, "loss": 3.2589709758758545 }, { "id": "identity_188407", "image": "A-OKVQA/images/59278.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest?" }, { "from": "gpt", "value": "The answer is Dusk or evening" } ], "clip_score": 22.75, "loss": 3.109173059463501 }, { "id": "identity_64714", "image": "AI2D/images/3490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the organelle where photosynthesis occurs in the plant cell" }, { "from": "gpt", "value": "The answer is Chloroplast" } ], "clip_score": 31.875, "loss": 1.224678874015808 }, { "id": "identity_64307", "image": "ScienceQA/images/18589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to represent land on this map?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 33.21875, "loss": 3.0003552436828613 }, { "id": "identity_231474", "image": "VizWiz/images/VizWiz_train_00015286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tines does this fork have, and what is the total number of tines if you have three such forks?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.28125, "loss": 4.002331733703613 }, { "id": "identity_116558", "image": "TextVQA/images/2125f07537bfee03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many app icons are visible on the phone's screen?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 17.9375, "loss": 2.319620370864868 }, { "id": "identity_90893", "image": "Geometry3K/images/222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If P is the centroid of triangle ACE, and PF = 6, what is the length of PD?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 35.53125, "loss": 2.1685843467712402 }, { "id": "identity_137973", "image": "DVQA/images/bar_train_00152348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have a higher value for 'liquid' than for 'tip'?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.359375, "loss": 2.4791088104248047 }, { "id": "identity_293555", "image": "ChartQA/images/10303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the median value of mostly positive numbers greater than the average between the largest and smallest numbers?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.2890625, "loss": 3.3105950355529785 }, { "id": "identity_273019", "image": "PMC-VQA/images/PMC7870731_Fig16_363993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which anatomical plane is depicted in the image provided? \nOptions:\n(A) transverse plane \n(B) sagittal plane \n(C) coronal plane \n(D) oblique plane" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.96875, "loss": 4.543315410614014 }, { "id": "identity_159763", "image": "GeoQA+/images/4847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle DBG?" }, { "from": "gpt", "value": "The answer is 100 m²" } ], "clip_score": 34.53125, "loss": 2.1530332565307617 }, { "id": "identity_114002", "image": "VQA-AS/images/18263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the fawn's spots?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 23.21875, "loss": 3.93951678276062 }, { "id": "identity_326099", "image": "MapQA/images/map_11666.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have a higher value than Rhode Island?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.09375, "loss": 2.696458101272583 }, { "id": "identity_47881", "image": "FigureQA/images/70837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Royal Blue line ever surpass the Orange line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.4375, "loss": 3.2559330463409424 }, { "id": "identity_8553", "image": "FigureQA/images/30737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green less than Dark Salmon?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 40.15625, "loss": 3.5948009490966797 }, { "id": "identity_82076", "image": "Geometry3K/images/1330.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the quadrilateral?" }, { "from": "gpt", "value": "The answer is 86" } ], "clip_score": 30.203125, "loss": 2.7814810276031494 }, { "id": "identity_202881", "image": "A-OKVQA/images/262985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the building in the background?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 10.703125, "loss": 3.651059627532959 }, { "id": "identity_322439", "image": "VQA2.0/images/268081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image labeled?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.625, "loss": 3.9582431316375732 }, { "id": "identity_242526", "image": "PlotQA/images/109323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total merchandise trade (exports + imports) for all countries combined, and what is the average merchandise trade per country?" }, { "from": "gpt", "value": "The answer is 27.8%" } ], "clip_score": 16.375, "loss": 3.0463244915008545 }, { "id": "identity_320925", "image": "DocVQA/images/hnfb0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which suite is mentioned?" }, { "from": "gpt", "value": "The answer is TERRACE SUITE" } ], "clip_score": 14.7890625, "loss": 3.353485584259033 }, { "id": "identity_204688", "image": "VQA-AS/images/10605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the person's shirt?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.515625, "loss": 3.404573917388916 }, { "id": "identity_76151", "image": "AI2D/images/4293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geological feature is formed above the subducting plate?" }, { "from": "gpt", "value": "The answer is Volcano" } ], "clip_score": 36.1875, "loss": 2.644017457962036 }, { "id": "identity_335687", "image": "DVQA/images/bar_train_00105203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have values less than 9?" }, { "from": "gpt", "value": "The answer is six" } ], "clip_score": 22.171875, "loss": 4.1973981857299805 }, { "id": "identity_295348", "image": "DocVQA/images/frld0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the goal or purpose?" }, { "from": "gpt", "value": "The answer is To increase awareness of Neurontin's efficacy in the treatment of diabetic neuropathy" } ], "clip_score": 22.75, "loss": 1.792533040046692 }, { "id": "identity_311738", "image": "FigureQA/images/99327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Blue greater than this?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.796875, "loss": 4.029090881347656 }, { "id": "identity_101161", "image": "IconQA/images/39055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each segment on the digital clock consumes 0.02 watts of power, how much power is being used by the number 5?" }, { "from": "gpt", "value": "The answer is 0.1 watts" } ], "clip_score": 32.53125, "loss": 1.3229076862335205 }, { "id": "identity_332021", "image": "A-OKVQA/images/99681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most important procedure in this environment?\nChoices:\n(A) lunch\n(B) schedule\n(C) timesheet\n(D) safety" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.2421875, "loss": 4.378972053527832 }, { "id": "identity_214462", "image": "A-OKVQA/images/539891.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible trash bins in the image?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.9375, "loss": 3.878845691680908 }, { "id": "identity_49293", "image": "DVQA/images/bar_train_00132234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the 'key' category across all series?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 20.078125, "loss": 2.3978188037872314 }, { "id": "identity_15513", "image": "IconQA/images/4997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are on the frame?\nChoices:\n(A) 9\n(B) 3\n(C) 6\n(D) 7\n(E) 5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.5, "loss": 3.203296661376953 }, { "id": "identity_240648", "image": "FigureQA/images/10093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the lengths of the bars for Dark Olive and Chartreuse?" }, { "from": "gpt", "value": "The answer is 15 units" } ], "clip_score": 32.25, "loss": 2.210587978363037 }, { "id": "identity_258354", "image": "DocVQA/images/ffcn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What positions did Andrew W. LaVigne hold before joining the American Seed Trade Association, and what were his responsibilities in those roles?" }, { "from": "gpt", "value": "The answer is Executive VP/CEO of Florida Citrus Mutual, President/Executive Director of FFAA, Legislative Director for Congressman Charles Canady, Agriculture Committee staffer for Congressman Tom Lewis, USDA staff for Secretary Ed Madigan" } ], "clip_score": 23.671875, "loss": 2.188049554824829 }, { "id": "identity_176817", "image": "TabMWP/images/15052.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the wind speed on Sunday was 7 miles per hour, what would be the new average wind speed for the week?" }, { "from": "gpt", "value": "The answer is 8.14 miles per hour" } ], "clip_score": 39.84375, "loss": 1.7493714094161987 }, { "id": "identity_192178", "image": "VQA2.0/images/334220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any condiments visible on the table?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.3125, "loss": 3.046334981918335 }, { "id": "identity_278284", "image": "VizWiz/images/VizWiz_train_00019623.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which button should I press to obtain zero code?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 16.546875, "loss": 3.699320077896118 }, { "id": "identity_91249", "image": "UniGeo/images/2786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and CD is perpendicular to AB at E, and the length of CD is 8, what is the radius of circle O?" }, { "from": "gpt", "value": "The answer is 4*sqrt(2)" } ], "clip_score": 37.15625, "loss": 1.7976435422897339 }, { "id": "identity_106344", "image": "Super-CLEVR/images/superCLEVR_new_000389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned between the green shiny motorbike and the purple object?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 34.78125, "loss": 4.936718940734863 }, { "id": "identity_220087", "image": "TextVQA/images/d603fc58b27471e4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage dispenser is visible in the right corner of the image?" }, { "from": "gpt", "value": "The answer is Soda" } ], "clip_score": 24.3125, "loss": 3.34148907661438 }, { "id": "identity_194076", "image": "TextVQA/images/78d8b7071cde5fbf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the word \"rebajas\" from in the bottom middle image?" }, { "from": "gpt", "value": "The answer is Spanish" } ], "clip_score": 18.390625, "loss": 3.5064945220947266 }, { "id": "identity_231563", "image": "IconQA/images/59763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you cut the twig into 4 equal pieces, what is the length of each piece to the nearest inch?" }, { "from": "gpt", "value": "The answer is 1 inch" } ], "clip_score": 36.53125, "loss": 1.7161171436309814 }, { "id": "identity_315303", "image": "IconQA/images/32696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of this shape?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 28.8125, "loss": 2.223545551300049 }, { "id": "identity_16109", "image": "Geometry3K/images/62.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In \\odot F, A B \\cong B C, D F = 3 x - 7, and F E = x + 9. What is x?\nChoices:\n(A) 4\n(B) 7\n(C) 8\n(D) 9" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.375, "loss": 2.7277655601501465 }, { "id": "identity_93360", "image": "TabMWP/images/1054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of rotten tomatoes per barrel?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.9375, "loss": 2.7579267024993896 }, { "id": "identity_283747", "image": "AI2D/images/3708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the stomach:\n(A) E\n(B) D\n(C) F\n(D) C" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 4.178948879241943 }, { "id": "identity_188161", "image": "PMC-VQA/images/PMC7713150_Fig3_314221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many 'Xf' labeled structures are there in the image?" }, { "from": "gpt", "value": "The answer is There are six 'Xf' labeled structures" } ], "clip_score": 32.46875, "loss": 2.044616222381592 }, { "id": "identity_106332", "image": "A-OKVQA/images/260182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the street scene located in an area where driving is on the right or left side?" }, { "from": "gpt", "value": "The answer is Right side driving" } ], "clip_score": 20.109375, "loss": 3.3489773273468018 }, { "id": "identity_144367", "image": "DocVQA/images/xfhb0228_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time does the Business Session start on October 3, 1963?" }, { "from": "gpt", "value": "The answer is 1:30 p.m" } ], "clip_score": 23.09375, "loss": 1.4555500745773315 }, { "id": "identity_113954", "image": "VQA-AS/images/2640.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects can be seen on the window sill?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 20.234375, "loss": 2.7318646907806396 }, { "id": "identity_252804", "image": "Super-CLEVR/images/superCLEVR_new_001684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects that are either big green things, large purple objects, or road bikes?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.578125, "loss": 2.1379177570343018 }, { "id": "identity_58964", "image": "FigureQA/images/15873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Gray section represent more than 25% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.46875, "loss": 3.215142011642456 }, { "id": "identity_140496", "image": "MapQA/images/map_8662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest percentage of passengers according to the map?" }, { "from": "gpt", "value": "The answer is Texas with 53.8%-53.9%" } ], "clip_score": 35.9375, "loss": 2.278036117553711 }, { "id": "identity_283464", "image": "TabMWP/images/13379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.8125, "loss": 4.507072448730469 }, { "id": "identity_184113", "image": "PMC-VQA/images/PMC8364005_Fig2_34492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the structure indicated by the circle in the image part of the brain's ventricular system?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 28.734375, "loss": 3.265964984893799 }, { "id": "identity_114570", "image": "PMC-VQA/images/PMC9344258_Fig8_362016.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the yellow spheres in the image all of the same size and shape?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.03125, "loss": 3.2682487964630127 }, { "id": "identity_252237", "image": "PlotQA/images/111012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total net disbursements (concessional and nonconcessional) received by Middle East & North Africa (developing only) and South Asia, and which region received more in 1973?" }, { "from": "gpt", "value": "The answer is South Asia received more total net disbursements" } ], "clip_score": 27.796875, "loss": 1.729638695716858 }, { "id": "identity_32367", "image": "IconQA/images/35231.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this shape have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.0, "loss": 2.0156636238098145 }, { "id": "identity_88478", "image": "GeoQA+/images/1032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,若∠BDC=31°,则∠BAD的度数是多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 29.109375, "loss": 1.7688896656036377 }, { "id": "identity_111320", "image": "TextVQA/images/d8227cdf6d2fb9c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of this port wine?" }, { "from": "gpt", "value": "The answer is KROHN" } ], "clip_score": 37.375, "loss": 2.7398576736450195 }, { "id": "identity_268453", "image": "DVQA/images/bar_train_00108380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is symbolized by the color sky blue?" }, { "from": "gpt", "value": "The answer is play" } ], "clip_score": 21.40625, "loss": 4.385534286499023 }, { "id": "identity_143315", "image": "DocVQA/images/qhjl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are listed on the right side of the document?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 23.75, "loss": 2.480797290802002 }, { "id": "identity_44382", "image": "DVQA/images/bar_train_00045210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there in the graph?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.625, "loss": 2.3527934551239014 }, { "id": "identity_63741", "image": "ScienceQA/images/3915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the shadow direction in the image suggest?" }, { "from": "gpt", "value": "The answer is Around noon" } ], "clip_score": 12.203125, "loss": 3.242072582244873 }, { "id": "identity_65456", "image": "ScienceQA/images/648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the grizzly bear in this food web?" }, { "from": "gpt", "value": "The answer is Top predator and consumer" } ], "clip_score": 34.3125, "loss": 3.244279146194458 }, { "id": "identity_114944", "image": "VQA2.0/images/517929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image appear to be taken?" }, { "from": "gpt", "value": "The answer is Nighttime" } ], "clip_score": 18.609375, "loss": 3.450632095336914 }, { "id": "identity_7991", "image": "ScienceQA/images/6995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of purple particles?\nChoices:\n(A) Solution A\n(B) Solution B\n(C) neither; their concentrations are the same" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.6875, "loss": 3.2853140830993652 }, { "id": "identity_310559", "image": "AI2D/images/3255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the central part of the nucleus?\nChoices:\n(A) nucleolus\n(B) smooth ER\n(C) none of the above\n(D) cytoplasm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.8125, "loss": 4.069258213043213 }, { "id": "identity_14958", "image": "ScienceQA/images/12124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 3.5722992420196533 }, { "id": "identity_54468", "image": "DVQA/images/bar_train_00138951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the item with the highest sales and the item with the lowest sales?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.40625, "loss": 2.4293830394744873 }, { "id": "identity_208274", "image": "VQA2.0/images/338637.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest?" }, { "from": "gpt", "value": "The answer is Early morning or late afternoon" } ], "clip_score": 15.53125, "loss": 3.6431667804718018 }, { "id": "identity_266064", "image": "VQA-RAD/images/synpic38263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are you concerned about a physical injury or a medical procedure?" }, { "from": "gpt", "value": "The answer is Medical process" } ], "clip_score": 20.28125, "loss": 4.547689914703369 }, { "id": "identity_84627", "image": "GeoQA+/images/3687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 2 in the diagram?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 32.53125, "loss": 1.7896361351013184 }, { "id": "identity_307583", "image": "ChartQA/images/two_col_81806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When did this reach the highest point?" }, { "from": "gpt", "value": "The answer is FY 2017" } ], "clip_score": 19.953125, "loss": 1.7489135265350342 }, { "id": "identity_238199", "image": "VQA-RAD/images/synpic32727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hyperattenuations are located around the patient, and what anatomical structures do they correspond to?" }, { "from": "gpt", "value": "The answer is 12 ribs" } ], "clip_score": 21.953125, "loss": 4.094612121582031 }, { "id": "identity_324108", "image": "ScienceQA/images/11139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) each vial . . . the surroundings\n(B) the surroundings . . . each vial" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.609375, "loss": 3.737029552459717 }, { "id": "identity_84405", "image": "Geometry3K/images/1128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ZED?" }, { "from": "gpt", "value": "The answer is 22 degrees" } ], "clip_score": 31.140625, "loss": 2.7989768981933594 }, { "id": "identity_192795", "image": "Super-CLEVR/images/superCLEVR_new_001629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more two-wheeled or four-wheeled vehicles in the image?" }, { "from": "gpt", "value": "The answer is Four-wheeled" } ], "clip_score": 19.859375, "loss": 1.7900251150131226 }, { "id": "identity_323769", "image": "A-OKVQA/images/379987.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are these items placed in this?\nChoices:\n(A) protection\n(B) to cook\n(C) to eat\n(D) to dye" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.59375, "loss": 4.493357181549072 }, { "id": "identity_264554", "image": "Geometry3K/images/233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the measure of angle GH is 78 degrees, what is the measure of angle 2?\n(A) 39\n(B) 51\n(C) 78\n(D) 114" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.015625, "loss": 2.672653913497925 }, { "id": "identity_65793", "image": "ScienceQA/images/12598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is directly to the right of the Pond?" }, { "from": "gpt", "value": "The answer is Police Department" } ], "clip_score": 28.359375, "loss": 3.1270265579223633 }, { "id": "identity_84360", "image": "UniGeo/images/1449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD is the diameter of circle O and angle ABC is 65 degrees, what is the measure of angle OBD?" }, { "from": "gpt", "value": "The answer is 25 degrees" } ], "clip_score": 32.78125, "loss": 2.0120182037353516 }, { "id": "identity_32057", "image": "A-OKVQA/images/63140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What cloth item hangs next to the man?\nChoices:\n(A) tablecloth\n(B) banner\n(C) curtain\n(D) poster" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 12.4609375, "loss": 4.224970817565918 }, { "id": "identity_86699", "image": "GeoQA+/images/4619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and angle ABC is 50 degrees, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 80 degrees" } ], "clip_score": 34.375, "loss": 2.032087802886963 }, { "id": "identity_106842", "image": "A-OKVQA/images/356286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle in the center of the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 23.84375, "loss": 3.329803943634033 }, { "id": "identity_180014", "image": "TabMWP/images/4516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of turtleneck sweaters per drawer?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 37.875, "loss": 1.868786096572876 }, { "id": "identity_315499", "image": "TQA/images/question_images/human_system_reproductory_7038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What covers this?\nChoices:\n(A) bladder\n(B) penis\n(C) epididymis\n(D) scrotum" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.1875, "loss": 4.122010707855225 }, { "id": "identity_67705", "image": "TQA/images/abc_question_images/human_system_ear_16098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the auditory nerve?" }, { "from": "gpt", "value": "The answer is N" } ], "clip_score": 25.5, "loss": 2.714372396469116 }, { "id": "identity_277518", "image": "TabMWP/images/38361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on Erik's current amount of $160.50, can he afford to purchase both a beach umbrella and a picnic table?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.625, "loss": 3.2224361896514893 }, { "id": "identity_233863", "image": "TabMWP/images/25888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of ducks per pond in the city?" }, { "from": "gpt", "value": "The answer is 47.83" } ], "clip_score": 26.59375, "loss": 2.21667218208313 }, { "id": "identity_36027", "image": "TextVQA/images/b2f716a3390e71a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is written on the sleeves of the players in white?" }, { "from": "gpt", "value": "The answer is lg dios" } ], "clip_score": 18.65625, "loss": 3.9672818183898926 }, { "id": "identity_206022", "image": "PMC-VQA/images/PMC7527384_Fig9_263871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the different colors used in the Doppler mapping?" }, { "from": "gpt", "value": "The answer is Direction and velocity of blood flow" } ], "clip_score": 27.078125, "loss": 3.4485509395599365 }, { "id": "identity_278700", "image": "A-OKVQA/images/64551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of biking event is taking place here?\nChoices:\n(A) hetrosexual\n(B) sit in\n(C) bake sale\n(D) gay" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.140625, "loss": 3.5871567726135254 }, { "id": "identity_266853", "image": "CLEVR-Math/images/CLEVR_train_008593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 4 big metal things, how many big metal things will remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 23.25, "loss": 2.214557409286499 }, { "id": "identity_233271", "image": "GeoQA+/images/898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD是△ABC的角平分线,DF⊥AB,垂足为F,DE=DG,△ADG和△AED的面积分别为50和25,求△BDF的面积。" }, { "from": "gpt", "value": "The answer is 12.5" } ], "clip_score": 28.203125, "loss": 1.4834834337234497 }, { "id": "identity_222955", "image": "PMC-VQA/images/PMC7823255_animals-11-00063-f002_348387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the colors red and blue in the Doppler ultrasound image?" }, { "from": "gpt", "value": "The answer is Blood flow direction relative to the probe" } ], "clip_score": 35.1875, "loss": 3.4784767627716064 }, { "id": "identity_336549", "image": "AI2D/images/799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on this food web, what would lead to an increase in the number of tadpoles?\nChoices:\n(A) more water plants\n(B) more fish\n(C) less water plants\n(D) more birds" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.28125, "loss": 3.4982352256774902 }, { "id": "identity_54843", "image": "DocVQA/images/jfkf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source URL for the document?" }, { "from": "gpt", "value": "The answer is https://www.industrydocuments.ucsf.edu/docs/jfk0226" } ], "clip_score": 22.71875, "loss": 2.284226894378662 }, { "id": "identity_98052", "image": "IconQA/images/62919.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of one rectangle is 8 square units, what is the combined area of all three rectangles?" }, { "from": "gpt", "value": "The answer is 24 square units" } ], "clip_score": 23.6875, "loss": 1.3703577518463135 }, { "id": "identity_201287", "image": "VQA2.0/images/99004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.046875, "loss": 3.1227633953094482 }, { "id": "identity_120187", "image": "VQA2.0/images/470691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the countertop where the stove is placed?" }, { "from": "gpt", "value": "The answer is Stainless steel" } ], "clip_score": 28.609375, "loss": 2.61568021774292 }, { "id": "identity_15678", "image": "AI2D/images/4476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: which of the leaf is biggest one\nChoices:\n(A) a\n(B) b\n(C) c\n(D) d" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.59375, "loss": 3.8805038928985596 }, { "id": "identity_196637", "image": "VQA2.0/images/260034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any boats with two masts?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.484375, "loss": 3.2217535972595215 }, { "id": "identity_155078", "image": "ScienceQA/images/11702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate latitude of the northern border of the highlighted state?" }, { "from": "gpt", "value": "The answer is 39° N latitude" } ], "clip_score": 24.71875, "loss": 2.381962299346924 }, { "id": "identity_53034", "image": "PlotQA/images/140565.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the rating for Burundi improving over the years?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.90625, "loss": 2.983879566192627 }, { "id": "identity_86228", "image": "UniGeo/images/2072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point A is at (1,2), what is the length of line segment OA?" }, { "from": "gpt", "value": "The answer is √5" } ], "clip_score": 29.5, "loss": 2.0985541343688965 }, { "id": "identity_97360", "image": "IconQA/images/287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are circles?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.015625, "loss": 2.3601930141448975 }, { "id": "identity_179699", "image": "IconQA/images/60315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can this shape stand upright on its base?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.890625, "loss": 3.003474712371826 }, { "id": "identity_97570", "image": "IconQA/images/7647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you color one of the hearts red, what fraction of the hearts will be red?" }, { "from": "gpt", "value": "The answer is 1/6" } ], "clip_score": 25.625, "loss": 1.5097019672393799 }, { "id": "identity_281424", "image": "AI2D/images/884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter corresponds to the adult stage in the given choices: (A) D, (B) B, (C) C, or (D) A?" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.65625, "loss": 3.9175009727478027 }, { "id": "identity_114281", "image": "VQA-RAD/images/synpic33226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the diaphragm elevated on either side?" }, { "from": "gpt", "value": "The answer is Diaphragm is not elevated on either side" } ], "clip_score": 26.640625, "loss": 2.122249126434326 }, { "id": "identity_95573", "image": "IconQA/images/69308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the marbles are orange?" }, { "from": "gpt", "value": "The answer is 4/4 or 1" } ], "clip_score": 34.9375, "loss": 1.7486252784729004 }, { "id": "identity_210213", "image": "VizWiz/images/VizWiz_train_00000474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fruit is depicted on the cream packaging?" }, { "from": "gpt", "value": "The answer is Strawberries" } ], "clip_score": 26.703125, "loss": 2.3534164428710938 }, { "id": "identity_291467", "image": "VQA2.0/images/135920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what shape is the lamp located in the room?" }, { "from": "gpt", "value": "The answer is no lamp" } ], "clip_score": 24.34375, "loss": 4.110686302185059 }, { "id": "identity_188884", "image": "PMC-VQA/images/PMC7590732_Fig2_283374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What plane of the body are these images taken in?" }, { "from": "gpt", "value": "The answer is Axial plane" } ], "clip_score": 26.8125, "loss": 3.015333414077759 }, { "id": "identity_99393", "image": "CLEVR-Math/images/CLEVR_train_045019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are purple?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.3125, "loss": 2.5134057998657227 }, { "id": "identity_230542", "image": "DocVQA/images/qlml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the recipient and the date of the letter?" }, { "from": "gpt", "value": "The answer is Cuban Sugar Year Book, P. O. Box 2549, Havana, Cuba, July 5th, 1945" } ], "clip_score": 29.75, "loss": 1.9210832118988037 }, { "id": "identity_55947", "image": "FigureQA/images/60915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Saddle Brown line?" }, { "from": "gpt", "value": "The answer is 46" } ], "clip_score": 26.265625, "loss": 2.865159273147583 }, { "id": "identity_166801", "image": "IconQA/images/70791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were to be extended to the 5 cm mark, how much longer in centimeters would it need to be?" }, { "from": "gpt", "value": "The answer is 1 cm" } ], "clip_score": 31.0625, "loss": 2.2880632877349854 }, { "id": "identity_171078", "image": "IconQA/images/87377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many edges does the shape have?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 29.875, "loss": 1.9714059829711914 }, { "id": "identity_294089", "image": "TextVQA/images/9885ee997e817f37.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the numerical value displayed on the diabetic testing machine?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 18.078125, "loss": 3.9428725242614746 }, { "id": "identity_292404", "image": "GeoQA+/images/2025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is the diameter of circle O, D is a point on circle O, and a tangent passing through point D intersects the extension of AB at point C. If ∠C = 30°, what is the degree measure of ∠CAD?\nChoices:\n(A) 60°\n(B) 45°\n(C) 50°\n(D) 30°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.9375, "loss": 2.673133134841919 }, { "id": "identity_108188", "image": "VQA2.0/images/570075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any structures on the beach for shade?" }, { "from": "gpt", "value": "The answer is Yes, a white tent-like structure" } ], "clip_score": 20.828125, "loss": 3.3536949157714844 }, { "id": "identity_326003", "image": "DocVQA/images/fpmj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was the Agreement dated?" }, { "from": "gpt", "value": "The answer is 3/1/01" } ], "clip_score": 21.296875, "loss": 2.4754343032836914 }, { "id": "identity_335732", "image": "GeoQA+/images/4006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC≌△ADC,∠ABC=118°,∠DAC=40°,则∠BCD的度数是多少?\n选项:\n(A) 40°\n(B) 44°\n(C) 50°\n(D) 84°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 2.4443655014038086 }, { "id": "identity_7582", "image": "TQA/images/abc_question_images/muscle_fiber_17085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label refers to the bone?\nChoices:\n(A) M\n(B) X\n(C) E\n(D) A" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.71875, "loss": 4.044084548950195 }, { "id": "identity_150532", "image": "ScienceQA/images/3906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnetic polarity of the rightmost magnet in the image?" }, { "from": "gpt", "value": "The answer is South Pole" } ], "clip_score": 21.0625, "loss": 3.0050718784332275 }, { "id": "identity_327188", "image": "ScienceQA/images/19004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this period, thermal energy moved from () to ().\nChoices:\n(A) the surroundings . . . each vial\n(B) each vial . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.34375, "loss": 3.620514392852783 }, { "id": "identity_70058", "image": "AI2D/images/1523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which part of the atom is the \"Electron\" located according to the image?" }, { "from": "gpt", "value": "The answer is Electron cloud" } ], "clip_score": 33.75, "loss": 3.665290117263794 }, { "id": "identity_123357", "image": "VQA2.0/images/188562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a fence in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a fence in the image" } ], "clip_score": 16.40625, "loss": 1.6877855062484741 }, { "id": "identity_61055", "image": "ChartQA/images/multi_col_60381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did \"Relative poverty\" stay above 15%?" }, { "from": "gpt", "value": "The answer is 3 years" } ], "clip_score": 26.515625, "loss": 2.0179924964904785 }, { "id": "identity_192446", "image": "PMC-VQA/images/PMC3948506_Fig11_272836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the labeled structure indicated by the arrow pointing to GM?" }, { "from": "gpt", "value": "The answer is Gluteus Maximus" } ], "clip_score": 26.890625, "loss": 2.405810594558716 }, { "id": "identity_185464", "image": "PMC-VQA/images/PMC8039083_F2_415472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is shown in the bright-field images of mouse intestinal slices?" }, { "from": "gpt", "value": "The answer is Not applicable to the provided image" } ], "clip_score": 26.296875, "loss": 3.2253360748291016 }, { "id": "identity_70266", "image": "ScienceQA/images/2776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of environment is the grasshopper in?" }, { "from": "gpt", "value": "The answer is A grassy field" } ], "clip_score": 31.265625, "loss": 2.8861851692199707 }, { "id": "identity_90166", "image": "UniGeo/images/2676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of parallelogram ABCD is 100 cm², what is the area of triangle APD?" }, { "from": "gpt", "value": "The answer is 50 cm²" } ], "clip_score": 34.0625, "loss": 0.9614962339401245 }, { "id": "identity_283349", "image": "A-OKVQA/images/123555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action is prohibited by the sign next to the green light?\nChoices:\n(A) left turns\n(B) right turns\n(C) u-turns\n(D) going straight" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.171875, "loss": 4.136046409606934 }, { "id": "identity_142719", "image": "DocVQA/images/rtvw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for contacting Mr. Rhoa regarding the Clinical Markers for CVD Outcomes paper?" }, { "from": "gpt", "value": "The answer is 8/10/00" } ], "clip_score": 23.484375, "loss": 2.1540184020996094 }, { "id": "identity_129197", "image": "VizWiz/images/VizWiz_train_00010203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the shadow suggest it is?" }, { "from": "gpt", "value": "The answer is Midday" } ], "clip_score": 17.0625, "loss": 3.1361052989959717 }, { "id": "identity_140482", "image": "DocVQA/images/fyww0217_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purchase order number for this project?" }, { "from": "gpt", "value": "The answer is 3993" } ], "clip_score": 19.75, "loss": 2.685394525527954 }, { "id": "identity_216492", "image": "A-OKVQA/images/551686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pieces of luggage are visible on the conveyor belt?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 31.109375, "loss": 2.0284953117370605 }, { "id": "identity_224767", "image": "Geometry3K/images/798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of x and then determine the perimeter of triangle PQT" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 31.0625, "loss": 3.1160812377929688 }, { "id": "identity_91722", "image": "IconQA/images/23861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of eyes per fish?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.4375, "loss": 1.859292984008789 }, { "id": "identity_267934", "image": "Super-CLEVR/images/superCLEVR_new_000840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the gray rubber object located to the right of the mountain bike and the small cyan metallic motorbike of equal size?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.015625, "loss": 5.613287925720215 }, { "id": "identity_171728", "image": "IconQA/images/39871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangles are there in the shape?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.171875, "loss": 2.541299343109131 }, { "id": "identity_20780", "image": "TextVQA/images/e6795fbd80367fb7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of boat is this?" }, { "from": "gpt", "value": "The answer is thunder" } ], "clip_score": 24.3125, "loss": 3.510139226913452 }, { "id": "identity_332208", "image": "PMC-VQA/images/PMC7562879_ccr33064-fig-0001_274937.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the two parts of this represent?\nChoices:\n(A) Fat in the bowel\n(B) Gas-filled bowel segments\n(C) Veins in the bowel wall\n(D) Folds in the bowel wall" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.359375, "loss": 4.152746200561523 }, { "id": "identity_243555", "image": "GeoQA+/images/5216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In parallelogram ABCD, AE:EB=1:2, S~△AEF~=6, find the area of parallelogram ABCD" }, { "from": "gpt", "value": "The answer is 108" } ], "clip_score": 36.0, "loss": 2.6116268634796143 }, { "id": "identity_180517", "image": "IconQA/images/72509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select a marble without looking, what is the probability of picking a pink one?" }, { "from": "gpt", "value": "The answer is 1 or 100%" } ], "clip_score": 29.703125, "loss": 1.581107497215271 }, { "id": "identity_136587", "image": "DVQA/images/bar_train_00048467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item 'whisky' across all stores?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 24.640625, "loss": 2.5510029792785645 }, { "id": "identity_192207", "image": "A-OKVQA/images/281725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person in the foreground doing?" }, { "from": "gpt", "value": "The answer is Snowboarding" } ], "clip_score": 28.46875, "loss": 3.03859281539917 }, { "id": "identity_233191", "image": "VQA-RAD/images/synpic38630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the possible complications associated with an abscess in the abdominal cavity as seen in the CT scan?" }, { "from": "gpt", "value": "The answer is sepsis, rupture, spread of infection" } ], "clip_score": 31.390625, "loss": 2.5014004707336426 }, { "id": "identity_314730", "image": "UniGeo/images/2743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A、B、C在圆O上,∠OBC=50°,则∠A的度数是多少?\n选项:\n(A) 20°\n(B) 40°\n(C) 50°\n(D) 100°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.1875, "loss": 2.6048216819763184 }, { "id": "identity_148485", "image": "DocVQA/images/lgpg0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weight range for women with a height of 70 inches?" }, { "from": "gpt", "value": "The answer is 136 - 166" } ], "clip_score": 23.90625, "loss": 2.0966596603393555 }, { "id": "identity_195726", "image": "TextVQA/images/884092fb1539d8c6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the packaging for the toy labeled \"Seebär\"?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 18.765625, "loss": 2.8760688304901123 }, { "id": "identity_301060", "image": "PlotQA/images/153966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the net enrollment rate in primary schools in one country to that in another country?" }, { "from": "gpt", "value": "The answer is 0.98" } ], "clip_score": 32.5, "loss": 1.9641129970550537 }, { "id": "identity_38100", "image": "PMC-VQA/images/PMC6712706_Fig2_29631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the yellow lines represent in the radiograph? \nChoices:\n(A) PDPAA \n(B) HVA \n(C) IMA \n(D) DMAA " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.96875, "loss": 4.147072792053223 }, { "id": "identity_230334", "image": "PMC-VQA/images/PMC5980103_Fig3_322321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure indicated by the arrowhead and describe its role in tooth development" }, { "from": "gpt", "value": "The answer is Epithelial pearls, involved in enamel organ formation" } ], "clip_score": 27.546875, "loss": 3.0994679927825928 }, { "id": "identity_323740", "image": "TextVQA/images/00ec8fa225899817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many awards has this received?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 16.875, "loss": 1.984838843345642 }, { "id": "identity_255912", "image": "UniGeo/images/333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O的半径为2,点A为⊙O上一点,半径OD⊥弦BC于D,如果∠BAC=60°,求弦BC的长度。" }, { "from": "gpt", "value": "The answer is \\( 2\\sqrt{3} \\)" } ], "clip_score": 27.375, "loss": 2.095973491668701 }, { "id": "identity_170657", "image": "IconQA/images/58378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the time is 2:15 PM now, what will be the time after 45 minutes?" }, { "from": "gpt", "value": "The answer is 3:15 PM" } ], "clip_score": 27.59375, "loss": 1.5923726558685303 }, { "id": "identity_205910", "image": "PMC-VQA/images/PMC7500744_FIG1_256789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the green annotation in the image?" }, { "from": "gpt", "value": "The answer is Irregular oval" } ], "clip_score": 26.421875, "loss": 4.11970329284668 }, { "id": "identity_85687", "image": "Geometry3K/images/33.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB = 2x + 3 and BC = 5x, what is the length of diagonal AC in the rhombus ABCD?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 38.25, "loss": 2.772571325302124 }, { "id": "identity_210068", "image": "VQA2.0/images/242203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is displayed on the magnet on the refrigerator?" }, { "from": "gpt", "value": "The answer is Cherries" } ], "clip_score": 18.59375, "loss": 3.380650758743286 }, { "id": "identity_190875", "image": "Super-CLEVR/images/superCLEVR_new_001578.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the vehicle in the middle?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 26.453125, "loss": 2.7095489501953125 }, { "id": "identity_191565", "image": "PMC-VQA/images/PMC8063182_fig0001_423112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any labeling on the MRI image that indicates the use of a contrast agent?" }, { "from": "gpt", "value": "The answer is Yes, labeled as \"gel" } ], "clip_score": 32.0, "loss": 3.1918632984161377 }, { "id": "identity_62591", "image": "DocVQA/images/jmfb0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city is the University of Utah located in?" }, { "from": "gpt", "value": "The answer is Salt Lake City" } ], "clip_score": 19.140625, "loss": 2.3527538776397705 }, { "id": "identity_7506", "image": "AI2D/images/3203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What interacts with symbiotic microbes?\nChoices:\n(A) none of the above\n(B) stem\n(C) leaf\n(D) roots" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.453125, "loss": 3.9501335620880127 }, { "id": "identity_281174", "image": "VQA2.0/images/348790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is it currently?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.4296875, "loss": 3.499235153198242 }, { "id": "identity_301176", "image": "UniGeo/images/1119.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD是⊙O的内接四边形,若∠BOD=88°,则∠BCD的度数是()\n选项:\n(A) 88°\n(B) 92°\n(C) 106°\n(D) 136°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.1875, "loss": 2.6388261318206787 }, { "id": "identity_39931", "image": "FigureQA/images/88053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Turquoise line ever intersect with the Brown line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.96875, "loss": 2.8722262382507324 }, { "id": "identity_293049", "image": "PMC-VQA/images/PMC4986164_DEV129973F2_63141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the SEM image?\nChoices:\n(A) Seedlings post chemical treatment.\n(B) Seedlings displaying enhanced phyllotactic patterns.\n(C) Seedlings exhibiting reduced phyllotactic patterns.\n(D) Seedlings with modified meristem size." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.015625, "loss": 4.04111909866333 }, { "id": "identity_213114", "image": "VizWiz/images/VizWiz_train_00010803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible logo or brand on the lunchbox?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.484375, "loss": 2.715864658355713 }, { "id": "identity_101528", "image": "CLEVR-Math/images/CLEVR_train_048601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.75, "loss": 2.8852596282958984 }, { "id": "identity_123557", "image": "PMC-VQA/images/PMC5563326_F3_205176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any cortical sulci be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes, cortical sulci can be identified in the image" } ], "clip_score": 36.59375, "loss": 1.4514561891555786 }, { "id": "identity_41383", "image": "MapQA/images/map_861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the map include data for Alaska and Hawaii?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.828125, "loss": 3.0414247512817383 }, { "id": "identity_164455", "image": "GeoQA+/images/3816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠OBD in the diagram?" }, { "from": "gpt", "value": "The answer is 126°" } ], "clip_score": 28.109375, "loss": 2.456071376800537 }, { "id": "identity_106000", "image": "VQA2.0/images/471375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person in the foreground doing?" }, { "from": "gpt", "value": "The answer is Sitting" } ], "clip_score": 15.9921875, "loss": 2.7796404361724854 }, { "id": "identity_13363", "image": "VizWiz/images/VizWiz_train_00006424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is sweetarts candy tabs" } ], "clip_score": 28.453125, "loss": 4.310611248016357 }, { "id": "identity_273768", "image": "GeoQA+/images/1343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, C, and D are on circle O. OA is perpendicular to BC with E as the foot of the perpendicular. If ∠ADC = 35°, what is the measure of ∠OBC? \nChoices:\n(A) 15°\n(B) 20°\n(C) 30°\n(D) 35°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.78125, "loss": 2.5897254943847656 }, { "id": "identity_133741", "image": "A-OKVQA/images/335065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting around the table?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 17.984375, "loss": 2.123394250869751 }, { "id": "identity_267760", "image": "VizWiz/images/VizWiz_train_00007868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cost of it?" }, { "from": "gpt", "value": "The answer is $1.05" } ], "clip_score": 22.484375, "loss": 1.6995397806167603 }, { "id": "identity_241083", "image": "ScienceQA/images/19008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential impacts on the drone's flight performance if Brooke's test shows that adding the blade guards made the drone fly poorly, and how could these impacts be measured?" }, { "from": "gpt", "value": "The answer is Increased weight, altered aerodynamics, and higher power consumption" } ], "clip_score": 23.515625, "loss": 2.450387954711914 }, { "id": "identity_271901", "image": "TQA/images/question_images/parts_worm_7296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following anatomical structures connects the mouth to the intestine? \n(A) Esophagus\n(B) Crop\n(C) Pumping vessels\n(D) Pharynx" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.25, "loss": 3.7110984325408936 }, { "id": "identity_34723", "image": "GeoQA+/images/5170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,CD是斜边AB上的中线,∠A=35°,则∠BCD=()\nChoices:\n(A) 35°\n(B) 45°\n(C) 60°\n(D) 55°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.625, "loss": 2.7848458290100098 }, { "id": "identity_316617", "image": "AI2D/images/1492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What comes after evaporation?\nChoices:\n(A) none of the above\n(B) infiltration\n(C) condensation\n(D) percolation" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.15625, "loss": 4.144606590270996 }, { "id": "identity_155274", "image": "ScienceQA/images/7464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of simple machine is represented by the slide?" }, { "from": "gpt", "value": "The answer is Inclined plane" } ], "clip_score": 23.203125, "loss": 2.1908113956451416 }, { "id": "identity_293843", "image": "DVQA/images/bar_train_00105380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar is labeled at the bottom of the first bar?" }, { "from": "gpt", "value": "The answer is score" } ], "clip_score": 20.453125, "loss": 3.4864723682403564 }, { "id": "identity_272143", "image": "MapQA/images/map_3573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which states bordering Rhode Island have the lowest value?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 25.46875, "loss": 2.476609468460083 }, { "id": "identity_3159", "image": "ScienceQA/images/3269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 3.4944205284118652 }, { "id": "identity_27185", "image": "VQA2.0/images/193077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many power lines are there?" }, { "from": "gpt", "value": "The answer is too many to count" } ], "clip_score": 15.140625, "loss": 2.884291648864746 }, { "id": "identity_30698", "image": "DocVQA/images/rhmg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the auth. no. mentioned in the given form ?" }, { "from": "gpt", "value": "The answer is 0500" } ], "clip_score": 19.34375, "loss": 2.5995187759399414 }, { "id": "identity_81324", "image": "Geometry3K/images/1725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle KLM is 154 square units, what is the height of the triangle from vertex L to base KM?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 33.96875, "loss": 2.1869232654571533 }, { "id": "identity_288052", "image": "A-OKVQA/images/351422.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of awnings present? \nChoices:\n(A) eight\n(B) three\n(C) none\n(D) four" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.2421875, "loss": 3.602461338043213 }, { "id": "identity_30826", "image": "TabMWP/images/10200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Kendra's classmates revealed how many science articles they read. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.28125, "loss": 2.2649848461151123 }, { "id": "identity_22101", "image": "TQA/images/question_images/tides_2602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many kinds of tides are shown?\nChoices:\n(A) 1\n(B) 5\n(C) 3\n(D) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.75, "loss": 2.9700636863708496 }, { "id": "identity_157040", "image": "ScienceQA/images/9901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders Florida to the west?" }, { "from": "gpt", "value": "The answer is Alabama" } ], "clip_score": 29.265625, "loss": 2.5497257709503174 }, { "id": "identity_17710", "image": "IconQA/images/71039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of hearts even or odd?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.65625, "loss": 4.596458911895752 }, { "id": "identity_278478", "image": "TabMWP/images/13635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The German club created a table displaying the ages of its members. What is the most frequently occurring age in the data set?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.21875, "loss": 1.759364366531372 }, { "id": "identity_123812", "image": "PMC-VQA/images/PMC7271094_Fig1_184019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle measurement indicated at the bottom left of the image?" }, { "from": "gpt", "value": "The answer is 5°" } ], "clip_score": 15.2109375, "loss": 2.685220241546631 }, { "id": "identity_207927", "image": "A-OKVQA/images/304295.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flowers are predominantly displayed in the window of the shop?" }, { "from": "gpt", "value": "The answer is Sunflowers" } ], "clip_score": 19.671875, "loss": 2.6342787742614746 }, { "id": "identity_83461", "image": "Geometry3K/images/404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle JKL?" }, { "from": "gpt", "value": "The answer is 27.5" } ], "clip_score": 37.15625, "loss": 1.594036340713501 }, { "id": "identity_106921", "image": "VQA-AS/images/11190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the soda can on the table?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.8515625, "loss": 3.1726512908935547 }, { "id": "identity_302184", "image": "VQA-AS/images/17628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person chasing the squirrels away?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.53125, "loss": 3.2424728870391846 }, { "id": "identity_277018", "image": "UniGeo/images/3142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, in rhombus paper ABCD, ∠A = 60°. Folding rhombus paper ABCD so that point C falls on the line containing DP (P is the midpoint of AB), results in a crease passing through point D, forming DE. What is the measure of ∠ADP?\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 45°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.6875, "loss": 2.2993648052215576 }, { "id": "identity_126471", "image": "A-OKVQA/images/245182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of snow sport is depicted in the image?" }, { "from": "gpt", "value": "The answer is Cross-country skiing" } ], "clip_score": 31.421875, "loss": 2.1804039478302 }, { "id": "identity_298671", "image": "UniGeo/images/494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the radius of the base of a cone is r=3 and the height is h=4, what is the lateral surface area of the cone?\nChoices:\n(A) 12π\n(B) 15π\n(C) 24π\n(D) 30π" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.0, "loss": 2.9099280834198 }, { "id": "identity_100517", "image": "IconQA/images/52208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the shapes?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 35.21875, "loss": 2.976525068283081 }, { "id": "identity_262687", "image": "VQA2.0/images/326232.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal is in your sight?" }, { "from": "gpt", "value": "The answer is horse" } ], "clip_score": 18.5625, "loss": 2.8887763023376465 }, { "id": "identity_319339", "image": "A-OKVQA/images/374818.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How might people move from the aircraft to the terminal?\nChoices:\n(A) taxi\n(B) stairs\n(C) gangway\n(D) truck" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.9375, "loss": 3.6575894355773926 }, { "id": "identity_295515", "image": "DocVQA/images/zrlp0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What duty is scheduled for April 28th?" }, { "from": "gpt", "value": "The answer is swanson center" } ], "clip_score": 20.640625, "loss": 4.480442047119141 }, { "id": "identity_273587", "image": "AI2D/images/2006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Considering the diagram provided, which organisms would be most impacted by a reduction in the quantity of grass?\nOptions:\n(A) Puma\n(B) seeds\n(C) Grasshopper\n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.359375, "loss": 3.2013633251190186 }, { "id": "identity_50665", "image": "MapQA/images/map_7259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have more than 400 holes?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.4375, "loss": 2.1474454402923584 }, { "id": "identity_65440", "image": "TQA/images/question_images/optics_lense_types_9160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the lens that has one flat side and one outward curving side?" }, { "from": "gpt", "value": "The answer is Plano - Convex Lens" } ], "clip_score": 34.03125, "loss": 2.4204447269439697 }, { "id": "identity_165690", "image": "Geometry3K/images/1863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of the two smallest angles in the given circle diagram?" }, { "from": "gpt", "value": "The answer is 225°" } ], "clip_score": 33.84375, "loss": 2.0341145992279053 }, { "id": "identity_312231", "image": "VQA-RAD/images/synpic47020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the area hyperintense or hypointense?" }, { "from": "gpt", "value": "The answer is Hyperintense" } ], "clip_score": 19.875, "loss": 2.5863406658172607 }, { "id": "identity_238260", "image": "VizWiz/images/VizWiz_train_00014032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the expiration date and the manufacturing date of the product?" }, { "from": "gpt", "value": "The answer is Expiration date and manufacturing date in MM/YYYY format" } ], "clip_score": 25.28125, "loss": 2.4407424926757812 }, { "id": "identity_317885", "image": "TextVQA/images/eb34a38e10f395b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on the sign beneath the one way sign on the pole in the center?" }, { "from": "gpt", "value": "The answer is no turn" } ], "clip_score": 19.546875, "loss": 3.8745200634002686 }, { "id": "identity_275957", "image": "DVQA/images/bar_train_00168414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of feed in rhythm less than the value of day in count?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.71875, "loss": 3.8680496215820312 }, { "id": "identity_223458", "image": "VizWiz/images/VizWiz_train_00004054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the iPod in the image?" }, { "from": "gpt", "value": "The answer is Approximately 4.1 inches tall" } ], "clip_score": 31.109375, "loss": 2.611522674560547 }, { "id": "identity_311553", "image": "Super-CLEVR/images/superCLEVR_new_001375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a matte dirtbike that is the same size as the gray object to the left of the blue metal bicycle?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.109375, "loss": 5.167848587036133 }, { "id": "identity_86943", "image": "Geometry3K/images/1401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of segment GH" }, { "from": "gpt", "value": "The answer is 12.3125" } ], "clip_score": 27.828125, "loss": 2.3877148628234863 }, { "id": "identity_205509", "image": "TextVQA/images/8774aec90ba585df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage is shown in the glass?" }, { "from": "gpt", "value": "The answer is Stout beer" } ], "clip_score": 27.3125, "loss": 2.6825618743896484 }, { "id": "identity_278027", "image": "DocVQA/images/szbj0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the University of California located?" }, { "from": "gpt", "value": "The answer is davis, california 95616" } ], "clip_score": 22.640625, "loss": 1.694062352180481 }, { "id": "identity_129726", "image": "VQA-RAD/images/synpic18461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cardiomegaly in this image?" }, { "from": "gpt", "value": "The answer is No signs of cardiomegaly" } ], "clip_score": 28.8125, "loss": 2.5412397384643555 }, { "id": "identity_147219", "image": "DVQA/images/bar_train_00126110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the 'temple' and 'image' categories in the 'left' bar?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.1875, "loss": 2.566903829574585 }, { "id": "identity_337727", "image": "PMC-VQA/images/PMC5991000_Fig2_325329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used to visualize this lesion?\nChoices:\n(A) Magnetic Resonance Imaging (MRI)\n(B) Computed Tomography (CT)\n(C) X-rays\n(D) Positron Emission Tomography (PET)" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.064126014709473 }, { "id": "identity_267816", "image": "GeoQA+/images/566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a person stands about 30 meters away from a utility pole with a small ruler marked in centimeters, extends their arm forward with the ruler held upright, and sees that the ruler covers the utility pole perfectly with about 12 markings (12 centimeters) visible on the ruler, and knowing that the person's arm length is approximately 60 centimeters, what is the approximate height of the utility pole?\nChoices:\n(A) 12 meters\n(B) 9 meters\n(C) 6 meters\n(D) 4 meters" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.53125, "loss": 2.885291814804077 }, { "id": "identity_64623", "image": "AI2D/images/324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this food web?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 30.15625, "loss": 4.207503318786621 }, { "id": "identity_252509", "image": "ScienceQA/images/12879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the ocean highlighted in the image and list three countries that have coastlines along this ocean" }, { "from": "gpt", "value": "The answer is Australia, South Africa, Argentina" } ], "clip_score": 33.96875, "loss": 3.189673662185669 }, { "id": "identity_194886", "image": "PMC-VQA/images/PMC8120838_Fig4_440244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of pathology in the image?" }, { "from": "gpt", "value": "The answer is No evidence of pathology visible" } ], "clip_score": 26.734375, "loss": 3.4636223316192627 }, { "id": "identity_119432", "image": "Super-CLEVR/images/superCLEVR_new_001207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Cyan" } ], "clip_score": 24.28125, "loss": 3.612311363220215 }, { "id": "identity_33956", "image": "VQA2.0/images/424907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are some of these people desiring to make use of public transportation?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 28.53125, "loss": 3.1499125957489014 }, { "id": "identity_188916", "image": "PMC-VQA/images/PMC3943034_F1_271738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the ventricles of the brain be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.0, "loss": 2.873011827468872 }, { "id": "identity_4799", "image": "IconQA/images/66890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What shape is this?\nChoices:\n(A) cone\n(B) cube\n(C) sphere" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.953125, "loss": 4.14965295791626 }, { "id": "identity_314949", "image": "FigureQA/images/93775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cornflower's value the lowest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.09375, "loss": 4.132420063018799 }, { "id": "identity_309097", "image": "GeoQA+/images/2718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AD是⊙O的切线,切点为A,AC是⊙O的直径,CD交⊙O于点B,连接OB,若⁀{AB}的度数为70°,则∠D的大小为多少?\n选项:\n(A) 70°\n(B) 60°\n(C) 55°\n(D) 35°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 2.5900442600250244 }, { "id": "identity_287194", "image": "A-OKVQA/images/90490.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which retail store does the display belong to?\nChoices:\n(A) Target\n(B) Sears\n(C) Walmart\n(D) Best Buy" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.078125, "loss": 3.7512869834899902 }, { "id": "identity_259766", "image": "DocVQA/images/mmhw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hours and minutes are scheduled for meetings and visits on Monday, July 21, excluding lunch and dinner?" }, { "from": "gpt", "value": "The answer is 3 hours and 39 minutes" } ], "clip_score": 20.71875, "loss": 1.688626766204834 }, { "id": "identity_255431", "image": "PMC-VQA/images/PMC5907520_fig1_303828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in this MRI image, and how can you differentiate between them based on their appearance?" }, { "from": "gpt", "value": "The answer is Bones, muscles, fat, and organs" } ], "clip_score": 31.875, "loss": 2.8470675945281982 }, { "id": "identity_266447", "image": "FigureQA/images/87926.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Sandy Brown and Yellow intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.640625, "loss": 3.521411180496216 }, { "id": "identity_254758", "image": "TabMWP/images/4160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average rate of change in the time Andy spent on homework from Sunday to Thursday?" }, { "from": "gpt", "value": "The answer is -0.5 minutes per day" } ], "clip_score": 35.75, "loss": 1.8350825309753418 }, { "id": "identity_266640", "image": "FigureQA/images/89323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the toughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.75, "loss": 3.8037803173065186 }, { "id": "identity_52036", "image": "FigureQA/images/51635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 35.78125, "loss": 3.5337817668914795 }, { "id": "identity_102001", "image": "TabMWP/images/24189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did Emmy drive more than 15 miles?" }, { "from": "gpt", "value": "The answer is 4 days" } ], "clip_score": 30.34375, "loss": 2.2847542762756348 }, { "id": "identity_199752", "image": "VQA2.0/images/104711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any helmets visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.9609375, "loss": 3.13061785697937 }, { "id": "identity_82991", "image": "GeoQA+/images/5414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the tree's fallen part from the breaking point to the top?" }, { "from": "gpt", "value": "The answer is 8 meters" } ], "clip_score": 28.203125, "loss": 3.046952724456787 }, { "id": "identity_294094", "image": "A-OKVQA/images/213687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of bird meats are available for sale in this large building, along with other items?\nChoices:\n(A) chicken\n(B) dove\n(C) duck\n(D) pigeon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 3.843595504760742 }, { "id": "identity_302896", "image": "Super-CLEVR/images/superCLEVR_new_001683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the green object that has the same shape as the red object?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 24.984375, "loss": 3.317988634109497 }, { "id": "identity_207310", "image": "PMC-VQA/images/PMC5069639_cne23988-fig-0003_85263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to highlight the sections in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.703125, "loss": 4.399506568908691 }, { "id": "identity_300735", "image": "FigureQA/images/87018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this have the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.84375, "loss": 4.083634853363037 }, { "id": "identity_102196", "image": "TabMWP/images/9571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in cost between the most expensive and the least expensive item on the menu?" }, { "from": "gpt", "value": "The answer is $8.29" } ], "clip_score": 24.640625, "loss": 2.3136963844299316 }, { "id": "identity_250019", "image": "FigureQA/images/9671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values of Navy Blue, Lime Green, and Dark Magenta?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 25.859375, "loss": 2.0480287075042725 }, { "id": "identity_172926", "image": "IconQA/images/88327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple paw prints are there in total?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.75, "loss": 2.004833459854126 }, { "id": "identity_85639", "image": "UniGeo/images/2909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AD = DO and ∠BAC = 22°, find the measure of ∠DFO" }, { "from": "gpt", "value": "The answer is 11°" } ], "clip_score": 30.90625, "loss": 2.2101831436157227 }, { "id": "identity_163819", "image": "GeoQA+/images/4789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAE in the given diagram?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 33.375, "loss": 1.6511892080307007 }, { "id": "identity_329655", "image": "UniGeo/images/2637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在▱ABCD中,∠ABC和∠BCD的平分线交边AD于点E,且BE=12,CE=5,则点A到BC的距离是多少?\nChoices:\n(A) \\frac{12}{5}\n(B) 4\n(C) \\frac{60}{13}\n(D) \\frac{60}{7}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.84375, "loss": 2.393212080001831 }, { "id": "identity_258191", "image": "A-OKVQA/images/68594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What evidence in the image suggests that the man is making a purchase at a vegetable market?" }, { "from": "gpt", "value": "The answer is Evidence of a vegetable market setting and plastic bag" } ], "clip_score": 32.15625, "loss": 3.2440080642700195 }, { "id": "identity_199025", "image": "PMC-VQA/images/PMC6236657_fig1_396440.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the brain in the image?" }, { "from": "gpt", "value": "The answer is Axial (cross-sectional from above)" } ], "clip_score": 31.796875, "loss": 3.058382511138916 }, { "id": "identity_248478", "image": "DocVQA/images/slyc0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number for the table that discusses \"Nutrient Intakes, Military Dependents, Burma, per Standard Man per Day, Recipe Method\"?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 17.78125, "loss": 2.3960306644439697 }, { "id": "identity_101409", "image": "CLEVR-Math/images/CLEVR_train_016262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.90625, "loss": 2.6076738834381104 }, { "id": "identity_78436", "image": "ScienceQA/images/17348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of forest biome is depicted in the image?" }, { "from": "gpt", "value": "The answer is Temperate deciduous forest" } ], "clip_score": 25.71875, "loss": 2.3290302753448486 }, { "id": "identity_45524", "image": "DVQA/images/bar_train_00095895.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the category with the highest combined value of all items?" }, { "from": "gpt", "value": "The answer is teeth" } ], "clip_score": 20.984375, "loss": 3.6237189769744873 }, { "id": "identity_90116", "image": "GeoQA+/images/2777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given image, if ∠GBC = 50° and AO⊥CD at E, what is the measure of ∠AED?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 33.78125, "loss": 1.6197205781936646 }, { "id": "identity_312890", "image": "DVQA/images/bar_train_00132217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is the least favored?" }, { "from": "gpt", "value": "The answer is ease" } ], "clip_score": 23.390625, "loss": 3.9131312370300293 }, { "id": "identity_52073", "image": "FigureQA/images/70575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value for Medium Turquoise?" }, { "from": "gpt", "value": "The answer is 78" } ], "clip_score": 22.4375, "loss": 2.773388624191284 }, { "id": "identity_15803", "image": "PlotQA/images/105112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between two consecutive major ticks on the Y-axis ?" }, { "from": "gpt", "value": "The answer is 5.0" } ], "clip_score": 23.203125, "loss": 2.4551546573638916 }, { "id": "identity_6844", "image": "TabMWP/images/26955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Diego just downloaded the new game Gem Excavator on his phone. In the first level, Diego gains points for each green gem he finds. However, he loses points for each red gem he finds. The table shows how the gems affect Diego's points. Which color gem affects Diego's points less?\nChoices:\n(A) green\n(B) red" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.515625, "loss": 3.663003921508789 }, { "id": "identity_148756", "image": "DocVQA/images/kgcx0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time does the Luncheon start?" }, { "from": "gpt", "value": "The answer is 12:45" } ], "clip_score": 15.15625, "loss": 1.741381287574768 }, { "id": "identity_220191", "image": "A-OKVQA/images/546378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the website address on the object near the cat?" }, { "from": "gpt", "value": "The answer is www.qcsb.com" } ], "clip_score": 23.203125, "loss": 2.8022170066833496 }, { "id": "identity_211286", "image": "VQA2.0/images/146151.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bikes have visible license plates?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.0, "loss": 2.6244637966156006 }, { "id": "identity_200981", "image": "PMC-VQA/images/PMC7071674_Fig4_133768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any areas of the image that appear brighter than the rest?" }, { "from": "gpt", "value": "The answer is Yes, there are bright spots" } ], "clip_score": 21.46875, "loss": 2.587062358856201 }, { "id": "identity_145363", "image": "FigureQA/images/81730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Light Salmon have a value above 75 on the y-axis label?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.390625, "loss": 3.126396417617798 }, { "id": "identity_320372", "image": "PMC-VQA/images/PMC7718624_j_tnsci-2020-0145_fig_003_315715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in this image?\nChoices:\n(A) Calcification of some bone tissue\n(B) Calcification of an epidural space\n(C) Calcification of some brain tissue\n(D) Calcification of a dura mater" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.75, "loss": 4.5577497482299805 }, { "id": "identity_82825", "image": "GeoQA+/images/689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle ABC is 24, what is the area of triangle AEF?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.875, "loss": 2.277296781539917 }, { "id": "identity_114252", "image": "A-OKVQA/images/459912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is advertised by the red sign on the right side of the image?" }, { "from": "gpt", "value": "The answer is Restaurant" } ], "clip_score": 15.0703125, "loss": 2.7978320121765137 }, { "id": "identity_224286", "image": "FigureQA/images/54633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Peru and Medium Aqua, and how does it compare to the combined percentage of Green Yellow and Medium Aqua?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 38.75, "loss": 4.605007171630859 }, { "id": "identity_288519", "image": "UniGeo/images/1138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, with circle O and diameter CD perpendicular to chord EF, with A as the foot of the perpendicular, if ∠OEA = 40°, what is the measure of ∠DCF?\nChoices:\n(A) 100°\n(B) 50°\n(C) 40°\n(D) 25°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.71875, "loss": 2.3306355476379395 }, { "id": "identity_166703", "image": "IconQA/images/35161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What transformation has been applied to the letter 'R' in the image?" }, { "from": "gpt", "value": "The answer is Translation to the right" } ], "clip_score": 26.765625, "loss": 3.715663433074951 }, { "id": "identity_84035", "image": "UniGeo/images/2043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOB?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 33.0625, "loss": 1.8759057521820068 }, { "id": "identity_200738", "image": "PMC-VQA/images/PMC4821972_fig4_18602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any sign of leakage in the area surrounding the lesion?" }, { "from": "gpt", "value": "The answer is No sign of leakage" } ], "clip_score": 23.84375, "loss": 3.09317684173584 }, { "id": "identity_63443", "image": "TQA/images/abc_question_images/cycle_nitrogen_16710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter marks the process of water runoff towards a water body?" }, { "from": "gpt", "value": "The answer is N" } ], "clip_score": 29.84375, "loss": 2.733675241470337 }, { "id": "identity_326390", "image": "PMC-VQA/images/PMC5758251_F5_260883.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens to something in the flexed position according to the caption?\nChoices:\n(A) Compression by the posterior wall of the L4 VB \n(B) Expansion due to decreased spinal pressure \n(C) No change in position or size \n(D) Compression of the anterior wall of the L4 VB" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.28125, "loss": 3.5655550956726074 }, { "id": "identity_191417", "image": "PMC-VQA/images/PMC6387555_Fig2_440103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the small circular structure at the center bottom of the image?" }, { "from": "gpt", "value": "The answer is Spinal column" } ], "clip_score": 28.03125, "loss": 3.747108221054077 }, { "id": "identity_150103", "image": "DocVQA/images/nkcd0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address to return the form to?" }, { "from": "gpt", "value": "The answer is 535 North Dearborn Street, Chicago, Illinois 60610" } ], "clip_score": 22.625, "loss": 1.1851786375045776 }, { "id": "identity_232786", "image": "TQA/images/question_images/waves_7680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the vertical distance between the crest and the trough of the wave called, and how is it calculated if the amplitude (a) is known?" }, { "from": "gpt", "value": "The answer is Wave height" } ], "clip_score": 35.03125, "loss": 3.3958704471588135 }, { "id": "identity_96657", "image": "TabMWP/images/7326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does it cost to take a car than a bicycle on the Southport-Fort Fisher ferry?" }, { "from": "gpt", "value": "The answer is $3" } ], "clip_score": 36.5, "loss": 2.265472650527954 }, { "id": "identity_294674", "image": "FigureQA/images/46577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater in shade, Dark Red or Light Slate?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.3125, "loss": 3.940225124359131 }, { "id": "identity_261986", "image": "IconQA/images/98055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show when Bella is going to school this morning?\nChoices:\n(A) 8:00 A.M.\n(B) 8:00 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.734375, "loss": 3.8359432220458984 }, { "id": "identity_220805", "image": "VQA2.0/images/456433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of sports equipment is the person using?" }, { "from": "gpt", "value": "The answer is Baseball glove" } ], "clip_score": 24.859375, "loss": 2.8785290718078613 }, { "id": "identity_32298", "image": "PMC-VQA/images/PMC4920187_BIO017384F2_44740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrowhead point to in images I-K? \nChoices:\n(A) thio-T-positive SC35-negative particle \n(B) a thio-T and SC35-positive particle \n(C) nucleus \n(D) nucleolus " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.515625, "loss": 4.993019104003906 }, { "id": "identity_92507", "image": "TabMWP/images/10369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average time Alec spent on homework over the five days?" }, { "from": "gpt", "value": "The answer is 39.6 minutes" } ], "clip_score": 36.65625, "loss": 1.6581971645355225 }, { "id": "identity_238104", "image": "VizWiz/images/VizWiz_train_00016425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many icons have notification badges, and what is the total number of notifications shown?" }, { "from": "gpt", "value": "The answer is 3 icons, 4 notifications" } ], "clip_score": 25.859375, "loss": 2.4958715438842773 }, { "id": "identity_35429", "image": "TextVQA/images/00548dfc8ec76f5d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is wrote on the first building on the right above the picture?" }, { "from": "gpt", "value": "The answer is bershkla" } ], "clip_score": 15.8203125, "loss": 3.2373433113098145 }, { "id": "identity_186058", "image": "TextVQA/images/00d3bc72520af34d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the cap worn by the catcher?" }, { "from": "gpt", "value": "The answer is New Era" } ], "clip_score": 23.859375, "loss": 2.6961557865142822 }, { "id": "identity_167957", "image": "IconQA/images/89067.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time difference between when Britney and Lee ate dinner?" }, { "from": "gpt", "value": "The answer is 2 hours" } ], "clip_score": 27.828125, "loss": 2.005619525909424 }, { "id": "identity_210550", "image": "PMC-VQA/images/PMC8145651_jof-07-00346-f001_448978.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of pneumothorax?" }, { "from": "gpt", "value": "The answer is No pneumothorax" } ], "clip_score": 28.859375, "loss": 2.771138906478882 }, { "id": "identity_247203", "image": "CLEVR-Math/images/CLEVR_train_059099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are left if you subtract all balls and one red cube?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.5625, "loss": 2.256878614425659 }, { "id": "identity_36141", "image": "IconQA/images/58111.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of the nail to the nearest inch. The nail is about (_) inches long." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.65625, "loss": 2.137126922607422 }, { "id": "identity_113407", "image": "TextVQA/images/92ee9779da5e2b9e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the mixers displayed in the store?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 25.234375, "loss": 3.5450198650360107 }, { "id": "identity_256109", "image": "PMC-VQA/images/PMC5910015_FIG1_304740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely diagnosis based on the location of the finding in the left temporal lobe on the CT scan?" }, { "from": "gpt", "value": "The answer is Tumor" } ], "clip_score": 33.71875, "loss": 2.8679211139678955 }, { "id": "identity_25767", "image": "A-OKVQA/images/234717.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where would lighting be most likely to hit in this area?\nChoices:\n(A) water\n(B) vehicle\n(C) rocks\n(D) lightning rod" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.40625, "loss": 4.355050563812256 }, { "id": "identity_31118", "image": "DocVQA/images/mgyk0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Budget Period mentioned in the document?" }, { "from": "gpt", "value": "The answer is 1 August 1976: 31 March 1977" } ], "clip_score": 32.3125, "loss": 1.3851326704025269 }, { "id": "identity_84499", "image": "UniGeo/images/1015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠AOB = 76°, what is the measure of ∠ABC?" }, { "from": "gpt", "value": "The answer is 38°" } ], "clip_score": 33.78125, "loss": 2.085681676864624 }, { "id": "identity_124107", "image": "A-OKVQA/images/572362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umbrella being used?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.5, "loss": 2.594750165939331 }, { "id": "identity_25528", "image": "IconQA/images/43466.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past two." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.84375, "loss": 5.1549835205078125 }, { "id": "identity_53886", "image": "FigureQA/images/67815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Brown line ever surpass the Tomato line in value?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.0, "loss": 3.2142510414123535 }, { "id": "identity_165590", "image": "GeoQA+/images/4962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle B?" }, { "from": "gpt", "value": "The answer is 50 degrees" } ], "clip_score": 31.765625, "loss": 2.283700704574585 }, { "id": "identity_224534", "image": "AI2D/images/67.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the sequential processes that lead from fossil fuels to atmospheric carbon in the carbon cycle diagram?" }, { "from": "gpt", "value": "The answer is combustion, atmospheric carbon" } ], "clip_score": 31.015625, "loss": 2.3364076614379883 }, { "id": "identity_309897", "image": "TabMWP/images/28907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the records of an orchard, on which day was the highest number of apples picked?\nChoices:\n(A) Friday\n(B) Saturday\n(C) Sunday\n(D) Monday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.5, "loss": 3.6812186241149902 }, { "id": "identity_166405", "image": "TabMWP/images/3853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a recipe calls for 3/4 kilogram of pepper, how much would that cost?" }, { "from": "gpt", "value": "The answer is $1.50" } ], "clip_score": 28.65625, "loss": 1.5468318462371826 }, { "id": "identity_265623", "image": "VQA-RAD/images/synpic57935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it possible to observe the lateral ventricles?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.796875, "loss": 3.523388385772705 }, { "id": "identity_87222", "image": "UniGeo/images/3257.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if DE is parallel to BC and AD:DB = 3:2, what is the ratio of the perimeter of triangle ADE to the perimeter of triangle ABC?" }, { "from": "gpt", "value": "The answer is 3:5" } ], "clip_score": 35.5, "loss": 1.443071961402893 }, { "id": "identity_109778", "image": "TextVQA/images/1003153971ee6833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What country is represented on the coin?" }, { "from": "gpt", "value": "The answer is Sverige (Sweden)" } ], "clip_score": 29.640625, "loss": 2.087881326675415 }, { "id": "identity_144753", "image": "DocVQA/images/ysbf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the budget for Services and Supplies in the year 1968-69?" }, { "from": "gpt", "value": "The answer is $1,200" } ], "clip_score": 28.09375, "loss": 1.863407850265503 }, { "id": "identity_241049", "image": "UniGeo/images/97.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知直线AB∥CD,∠BEG的平分线EF交CD于点F,若∠1=42°,求∠BEG的大小。" }, { "from": "gpt", "value": "The answer is 84°" } ], "clip_score": 31.359375, "loss": 2.0321767330169678 }, { "id": "identity_229364", "image": "Geometry3K/images/375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of \\( x \\) and the measure of \\(\\angle JLM\\)?" }, { "from": "gpt", "value": "The answer is \\( x = 4 \\) and \\(\\angle JLM = 32^\\circ\\)" } ], "clip_score": 33.53125, "loss": 1.6204360723495483 }, { "id": "identity_161647", "image": "Geometry3K/images/1686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 30.546875, "loss": 2.1747426986694336 }, { "id": "identity_67067", "image": "AI2D/images/1033.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season is it in the Northern Hemisphere during the autumnal equinox?" }, { "from": "gpt", "value": "The answer is Fall" } ], "clip_score": 31.875, "loss": 2.9854023456573486 }, { "id": "identity_104907", "image": "VQA-AS/images/19149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are hanging on the wall?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.953125, "loss": 4.990041255950928 }, { "id": "identity_327541", "image": "A-OKVQA/images/231631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of pattern is this surface?\nChoices:\n(A) square\n(B) flat\n(C) bumpy\n(D) black" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 7.09765625, "loss": 4.205714225769043 }, { "id": "identity_113387", "image": "Super-CLEVR/images/superCLEVR_new_001160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object that is partially obscured by another object?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.90625, "loss": 3.596611738204956 }, { "id": "identity_144810", "image": "PlotQA/images/137715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of disbursements received by Ghana from the Regional Development Bank over the years shown?" }, { "from": "gpt", "value": "The answer is 9000000" } ], "clip_score": 36.5625, "loss": 1.8450469970703125 }, { "id": "identity_90155", "image": "GeoQA+/images/4713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle AEB?" }, { "from": "gpt", "value": "The answer is 25√2 square units" } ], "clip_score": 28.265625, "loss": 2.611013412475586 }, { "id": "identity_108446", "image": "VQA2.0/images/376436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction is the arrow on the dog run sign pointing?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 30.671875, "loss": 3.1792407035827637 }, { "id": "identity_16440", "image": "TQA/images/abc_question_images/parts_microscope_17198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter indicates the lenses that adjust viewing?\nChoices:\n(A) J\n(B) M\n(C) A\n(D) K" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.03125, "loss": 4.038559436798096 }, { "id": "identity_206848", "image": "VQA2.0/images/173196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is being led by the individuals?" }, { "from": "gpt", "value": "The answer is A goat" } ], "clip_score": 18.9375, "loss": 3.4321835041046143 }, { "id": "identity_192669", "image": "VizWiz/images/VizWiz_train_00010756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number for Store Support?" }, { "from": "gpt", "value": "The answer is 855.222.0809" } ], "clip_score": 20.765625, "loss": 1.9925353527069092 }, { "id": "identity_6632", "image": "Geometry3K/images/1924.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the regular polygon figure. Round to the nearest tenth.\nChoices:\n(A) 27.7\n(B) 55.4\n(C) 166.3\n(D) 332.6" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 2.707078218460083 }, { "id": "identity_229303", "image": "TQA/images/question_images/human_system_eye_6144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the parts of the eye labeled 1, 5, 9, and 14, and describe their functions" }, { "from": "gpt", "value": "The answer is Cornea, Lens, Sclera, Optic Nerve" } ], "clip_score": 33.03125, "loss": 2.1808981895446777 }, { "id": "identity_289491", "image": "ScienceQA/images/5688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following is a bird?\nChoices:\n(A) mandarinfish\n(B) American bullfrog\n(C) emerald hummingbird\n(D) California toad" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.578125, "loss": 3.7287395000457764 }, { "id": "identity_254754", "image": "ScienceQA/images/8754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image, then determine the distance between this capital and the capital of the neighboring state to the west" }, { "from": "gpt", "value": "The answer is 95 miles" } ], "clip_score": 28.203125, "loss": 3.343082904815674 }, { "id": "identity_250639", "image": "PMC-VQA/images/PMC5749752_pone.0190287.g002_259014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between the imaging techniques used in the top and bottom images, and how do these differences affect the diagnostic information provided?" }, { "from": "gpt", "value": "The answer is T2w images show water content; late contrast-enhanced images show vascularity" } ], "clip_score": 28.75, "loss": 3.4101641178131104 }, { "id": "identity_222004", "image": "A-OKVQA/images/442040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the majority of the skiers' jackets?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 23.78125, "loss": 3.8710741996765137 }, { "id": "identity_13804", "image": "UniGeo/images/1882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的弦,半径OC⊥AB于点D,且AB=8,OC=5,则DC的长为()\nChoices:\n(A) 2\n(B) 5\n(C) 3\n(D) 1" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.3125, "loss": 2.4586775302886963 }, { "id": "identity_168288", "image": "TabMWP/images/4259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average frequency of the numbers spun?" }, { "from": "gpt", "value": "The answer is 11.33" } ], "clip_score": 31.78125, "loss": 1.7500207424163818 }, { "id": "identity_158242", "image": "ScienceQA/images/19409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Texas?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 27.984375, "loss": 2.3487112522125244 }, { "id": "identity_2635", "image": "CLEVR-Math/images/CLEVR_train_051384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all blue cylinders. How many cylinders are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.390625, "loss": 2.4973461627960205 }, { "id": "identity_123365", "image": "VQA2.0/images/449959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the baby's clothing?" }, { "from": "gpt", "value": "The answer is Space-themed with stars and planets" } ], "clip_score": 18.640625, "loss": 3.065706729888916 }, { "id": "identity_187001", "image": "VQA-AS/images/16846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage appears to be in the bottle on the table?" }, { "from": "gpt", "value": "The answer is Wine" } ], "clip_score": 23.5, "loss": 2.656419277191162 }, { "id": "identity_319322", "image": "PlotQA/images/142370.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of solid fuel emission in this country?" }, { "from": "gpt", "value": "The answer is 27.4541101356744" } ], "clip_score": 36.65625, "loss": 2.7893054485321045 }, { "id": "identity_157539", "image": "ScienceQA/images/1901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of teeth are prominent in the Ursus spelaeus's skull?" }, { "from": "gpt", "value": "The answer is Canines" } ], "clip_score": 28.875, "loss": 3.229548692703247 }, { "id": "identity_7686", "image": "VizWiz/images/VizWiz_train_00013954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is kitchen" } ], "clip_score": 23.09375, "loss": 3.4446258544921875 }, { "id": "identity_280234", "image": "UniGeo/images/2575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the rhombus ABCD shown in the diagram, where AC and BD are diagonals with lengths AC=6 and BD=8, what is the area of the shaded region?\nChoices:\n(A) 48\n(B) 10\n(C) 12\n(D) 24" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.46875, "loss": 2.6865646839141846 }, { "id": "identity_55063", "image": "FigureQA/images/61592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Medium Blue line?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 21.203125, "loss": 2.420910596847534 }, { "id": "identity_35623", "image": "AI2D/images/3705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What object is represented in the above diagram?\nChoices:\n(A) kidney\n(B) brain\n(C) urinary bladder\n(D) heart" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.38438606262207 }, { "id": "identity_43060", "image": "FigureQA/images/34604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the larger portion of the pie chart?" }, { "from": "gpt", "value": "The answer is Forest Green" } ], "clip_score": 39.0, "loss": 2.6493380069732666 }, { "id": "identity_62819", "image": "AI2D/images/2357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are there in the cycle shown in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.65625, "loss": 2.25888729095459 }, { "id": "identity_271779", "image": "VQA2.0/images/231315.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What action is the man performing towards the cop?" }, { "from": "gpt", "value": "The answer is taking picture of cop" } ], "clip_score": 29.578125, "loss": 3.3122823238372803 }, { "id": "identity_303934", "image": "VizWiz/images/VizWiz_train_00005558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these some kind of fruit?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.125, "loss": 3.489241123199463 }, { "id": "identity_168046", "image": "TabMWP/images/10267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of respondents surveyed?" }, { "from": "gpt", "value": "The answer is 2090" } ], "clip_score": 14.5703125, "loss": 2.489025354385376 }, { "id": "identity_196889", "image": "PMC-VQA/images/PMC6927053_fig1_89192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the largest abnormality in the image?" }, { "from": "gpt", "value": "The answer is Approximately a few centimeters" } ], "clip_score": 21.796875, "loss": 2.5283424854278564 }, { "id": "identity_195720", "image": "PMC-VQA/images/PMC7602441_materials-13-04567-f004_288281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate thickness of layer 2 in the image?" }, { "from": "gpt", "value": "The answer is 5 micrometers" } ], "clip_score": 25.640625, "loss": 1.947007656097412 }, { "id": "identity_1200", "image": "DocVQA/images/rtjx0225_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is some preliminary conclusions" } ], "clip_score": 25.796875, "loss": 2.1409800052642822 }, { "id": "identity_191320", "image": "PMC-VQA/images/PMC4304244_F8_352529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many images are displayed in the sequence?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.75, "loss": 4.168678283691406 }, { "id": "identity_190228", "image": "A-OKVQA/images/376677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sign for \"Car Accessories\" in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 9.4765625, "loss": 3.561669111251831 }, { "id": "identity_11829", "image": "TextVQA/images/eb2f1203508a74fb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does this bottlecap say?" }, { "from": "gpt", "value": "The answer is team millenium olympic partner" } ], "clip_score": 38.625, "loss": 2.7357029914855957 }, { "id": "identity_240041", "image": "TQA/images/question_images/human_system_reproductory_7044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the pathway of sperm from its production to its exit from the body, naming each structure it passes through in order" }, { "from": "gpt", "value": "The answer is Testis, Epididymis, Vas deferens, Seminal vesicle, Prostate gland, Urethra" } ], "clip_score": 30.6875, "loss": 0.7764689922332764 }, { "id": "identity_171268", "image": "IconQA/images/86300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips are partially visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.484375, "loss": 2.5250627994537354 }, { "id": "identity_230897", "image": "UniGeo/images/3099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,将△AOB绕点O按逆时针方向旋转45°后得到△A′OB′,若∠AOB=15°,则∠A′OB′的度数是多少?并求出∠A′OB′与∠AOB的差值。" }, { "from": "gpt", "value": "The answer is 60°, 45°" } ], "clip_score": 28.921875, "loss": 2.0254969596862793 }, { "id": "identity_143582", "image": "PlotQA/images/124494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the graph?" }, { "from": "gpt", "value": "The answer is Total rural population of a country" } ], "clip_score": 32.75, "loss": 2.5608246326446533 }, { "id": "identity_215905", "image": "A-OKVQA/images/550601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image appear to be taken?" }, { "from": "gpt", "value": "The answer is Midday or afternoon" } ], "clip_score": 10.7109375, "loss": 3.710935354232788 }, { "id": "identity_113156", "image": "A-OKVQA/images/485788.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing items are displayed in the window?" }, { "from": "gpt", "value": "The answer is Swimwear" } ], "clip_score": 16.59375, "loss": 2.5511937141418457 }, { "id": "identity_325875", "image": "VQA2.0/images/143507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the bus?" }, { "from": "gpt", "value": "The answer is 75" } ], "clip_score": 25.8125, "loss": 2.2571845054626465 }, { "id": "identity_311574", "image": "FigureQA/images/87469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.8359375, "loss": 4.122808456420898 }, { "id": "identity_264037", "image": "UniGeo/images/424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, CD is a chord of circle O, and ∠ADC is 35°. What is the degree measure of ∠CBA?\nChoices:\n(A) 35°\n(B) 45°\n(C) 55°\n(D) 65°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.0, "loss": 2.483808994293213 }, { "id": "identity_327619", "image": "TabMWP/images/13157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: An Idaho farmer has been monitoring crop prices over time. In 2006, did this seed or apples cost more per cwt?\nChoices:\n(A) apples\n(B) this seed" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 40.03125, "loss": 3.7765097618103027 }, { "id": "identity_186106", "image": "VQA2.0/images/475076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image appear to be taken?" }, { "from": "gpt", "value": "The answer is Morning or late afternoon" } ], "clip_score": 13.875, "loss": 3.5872302055358887 }, { "id": "identity_176392", "image": "IconQA/images/86936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unit squares are there along the perimeter of the shape?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.78125, "loss": 2.127869129180908 }, { "id": "identity_28966", "image": "FigureQA/images/15587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Indigo the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.6875, "loss": 3.992034435272217 }, { "id": "identity_212867", "image": "A-OKVQA/images/557174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a musical instrument in the room?" }, { "from": "gpt", "value": "The answer is Yes, a guitar" } ], "clip_score": 19.0625, "loss": 3.636364459991455 }, { "id": "identity_49425", "image": "PlotQA/images/64071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total net bilateral aid flow from DAC donors to South Asia in 2007?" }, { "from": "gpt", "value": "The answer is 10000000" } ], "clip_score": 24.46875, "loss": 1.6354267597198486 }, { "id": "identity_3353", "image": "GeoQA+/images/5261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,两直线a,b相交于点O.若∠1=60°,则∠2=()\nChoices:\n(A) 50°\n(B) 60°\n(C) 110°\n(D) 120°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.34375, "loss": 2.688154935836792 }, { "id": "identity_330496", "image": "PMC-VQA/images/PMC6542118_Fig2_487434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the eye is shown in this image?\nChoices:\n(A) Anterior segment of the iris \n(B) Posterior segment of the iris \n(C) Anterior segment of the ciliary process \n(D) Posterior segment of the ciliary process" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.359375, "loss": 3.792046308517456 }, { "id": "identity_60674", "image": "DocVQA/images/fjyg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the draft mentioned in the memorandum?" }, { "from": "gpt", "value": "The answer is Cereal Enrichment in Perspective, 1958!" } ], "clip_score": 25.578125, "loss": 3.352444887161255 }, { "id": "identity_40711", "image": "DVQA/images/bar_train_00122481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for the 'blonde' dataset?" }, { "from": "gpt", "value": "The answer is north" } ], "clip_score": 30.671875, "loss": 3.9716830253601074 }, { "id": "identity_97565", "image": "IconQA/images/62712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many leaves are partially hidden by the dotted line?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.625, "loss": 2.4564030170440674 }, { "id": "identity_260438", "image": "UniGeo/images/1548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AD∥BE∥CF,直线a、b与这三条平行线分别交于点A、B、C和点D、E、F,AB=4,BC=6,DE=3,若三角形ABC的面积为24,则三角形DEF的面积是多少?" }, { "from": "gpt", "value": "The answer is 13.5" } ], "clip_score": 28.75, "loss": 1.8936429023742676 }, { "id": "identity_24223", "image": "DocVQA/images/rxvh0227_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the budget requested and recommended for FY 1976-1977 ?" }, { "from": "gpt", "value": "The answer is $476,468" } ], "clip_score": 20.015625, "loss": 2.289931058883667 }, { "id": "identity_71830", "image": "AI2D/images/782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the end product of the cow's digestion process as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Egestion" } ], "clip_score": 30.59375, "loss": 3.03769850730896 }, { "id": "identity_189817", "image": "A-OKVQA/images/574215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cheese is visible on the pizza?" }, { "from": "gpt", "value": "The answer is Mozzarella cheese" } ], "clip_score": 28.125, "loss": 1.7911968231201172 }, { "id": "identity_78851", "image": "Geometry3K/images/1142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle adjacent to angle (2x) degrees?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 31.90625, "loss": 1.9121553897857666 }, { "id": "identity_39897", "image": "FigureQA/images/72794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Orchid have a lower minimum value than Coral?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.734375, "loss": 3.211388349533081 }, { "id": "identity_228864", "image": "AI2D/images/3936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two structures that connect the mesosoma to the gaster, and what is their sequence from the mesosoma to the gaster?" }, { "from": "gpt", "value": "The answer is Petiole, Post-petiole" } ], "clip_score": 27.203125, "loss": 2.0994744300842285 }, { "id": "identity_138871", "image": "DocVQA/images/mxfd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many signatures are present on the document?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 19.21875, "loss": 2.189263105392456 }, { "id": "identity_254118", "image": "GeoQA+/images/5047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AB的垂直平分线交AB于点D,交BC于点E,连接AE.若AB=6,△ACE的周长为13,且AE的长度为x,求△ABC的面积。" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 28.03125, "loss": 2.9131431579589844 }, { "id": "identity_4853", "image": "VQA2.0/images/305959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do you see any small animals?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 8.765625, "loss": 3.3165574073791504 }, { "id": "identity_24796", "image": "AI2D/images/3792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The petiole is labeled with which letter?\nChoices:\n(A) M\n(B) L\n(C) I\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.046875, "loss": 4.53042459487915 }, { "id": "identity_131765", "image": "VQA2.0/images/98748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many burners are on the stove?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.890625, "loss": 2.2403857707977295 }, { "id": "identity_8706", "image": "TQA/images/question_images/cycle_water_4978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which process is responsible for the rain?\nChoices:\n(A) Transpiration\n(B) Precipitation\n(C) Evaporation\n(D) Collection" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.6875, "loss": 4.503045558929443 }, { "id": "identity_97837", "image": "CLEVR-Math/images/CLEVR_train_058442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue objects are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.984375, "loss": 2.4158830642700195 }, { "id": "identity_242096", "image": "TQA/images/question_images/earth_day_night_2639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the Earth is in the northern hemisphere's summer, where would the vertical ray hit, and what is the significance of this location in terms of daylight duration at the Arctic Circle?" }, { "from": "gpt", "value": "The answer is Tropic of Cancer, 24 hours of daylight at Arctic Circle" } ], "clip_score": 33.71875, "loss": 1.9683209657669067 }, { "id": "identity_151663", "image": "ScienceQA/images/3590.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image shows a substance that is naturally occurring?" }, { "from": "gpt", "value": "The answer is Ocean water" } ], "clip_score": 30.515625, "loss": 2.268665313720703 }, { "id": "identity_148667", "image": "DocVQA/images/kjgf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the editor who sent the article on \"Appetite Regulation in the Young\" to the Journal of Diseases of Children?" }, { "from": "gpt", "value": "The answer is J. R. Brobeck" } ], "clip_score": 22.59375, "loss": 3.1226630210876465 }, { "id": "identity_195745", "image": "A-OKVQA/images/525971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car closest to the camera?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.546875, "loss": 3.0455310344696045 }, { "id": "identity_35399", "image": "A-OKVQA/images/337780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What mode of transport here is the oldest?\nChoices:\n(A) motorcycle\n(B) taxi\n(C) van\n(D) elephant" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.578125, "loss": 4.069305419921875 }, { "id": "identity_331408", "image": "TextVQA/images/6d7073db34e3af18.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is mentioned above these hands?" }, { "from": "gpt", "value": "The answer is omens" } ], "clip_score": 17.234375, "loss": 2.9145660400390625 }, { "id": "identity_17272", "image": "A-OKVQA/images/561919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many colors of leaf are in the hedge in the middle of the station?\nChoices:\n(A) four\n(B) one\n(C) two\n(D) three" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.6875, "loss": 3.3388850688934326 }, { "id": "identity_324302", "image": "IconQA/images/25869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are there?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 20.625, "loss": 1.4913311004638672 }, { "id": "identity_182030", "image": "VQA2.0/images/311208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the horse closest to the bottom edge of the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.609375, "loss": 3.8599343299865723 }, { "id": "identity_147896", "image": "MapQA/images/map_10184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest percentage range?" }, { "from": "gpt", "value": "The answer is the name of the state or states with this color" } ], "clip_score": 30.296875, "loss": 3.2024319171905518 }, { "id": "identity_200821", "image": "A-OKVQA/images/173553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main colors of the player's shoes?" }, { "from": "gpt", "value": "The answer is Yellow and White" } ], "clip_score": 16.1875, "loss": 2.1269993782043457 }, { "id": "identity_154940", "image": "ScienceQA/images/15704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state highlighted in dark green?" }, { "from": "gpt", "value": "The answer is Montpelier" } ], "clip_score": 29.6875, "loss": 2.6466124057769775 }, { "id": "identity_203827", "image": "VQA2.0/images/105708.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are watching the game from the stands?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 20.734375, "loss": 2.3686540126800537 }, { "id": "identity_59681", "image": "DocVQA/images/pqmw0228_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many prints of the Cardinal on Sunflower does the sender want to obtain?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 17.625, "loss": 3.3788981437683105 }, { "id": "identity_50729", "image": "FigureQA/images/83579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Tomato have a higher peak value than Crimson?" }, { "from": "gpt", "value": "The answer is No, Tomato does not have a higher peak value than Crimson" } ], "clip_score": 28.71875, "loss": 1.1576334238052368 }, { "id": "identity_94235", "image": "TabMWP/images/3964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern in the increase of the number of highlighters as the number of desk drawers increases?" }, { "from": "gpt", "value": "The answer is Increase of 5 highlighters per additional desk drawer" } ], "clip_score": 33.75, "loss": 1.8442506790161133 }, { "id": "identity_314304", "image": "Super-CLEVR/images/superCLEVR_new_000280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything else made of the same material as this?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 16.09375, "loss": 4.6666975021362305 }, { "id": "identity_268531", "image": "UniGeo/images/2159.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, there is a pond in a school campus. To determine the distance between two trees A and B by the pond, Xiao Liang collected the following data: ∠A = ∠CDE, AD = DC, DE = 10m. What is the distance between trees A and B?\nChoices:\n(A) 10m\n(B) 15m\n(C) 20m\n(D) 25m" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.59375, "loss": 2.6551716327667236 }, { "id": "identity_288574", "image": "PMC-VQA/images/PMC7500744_FIG1_256789.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is shown in the CT image? \nChoices:\n(A) Pulmonary embolism \n(B) Lung nodule \n(C) Pulmonary artery AVMs \n(D) Bronchitis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.65625, "loss": 4.027111530303955 }, { "id": "identity_172006", "image": "TabMWP/images/27563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pieces of fruit picked from all four trees?" }, { "from": "gpt", "value": "The answer is 295" } ], "clip_score": 33.53125, "loss": 2.198996067047119 }, { "id": "identity_54585", "image": "FigureQA/images/98278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Periwinkle line ever surpass the value of 40 on the y-axis label?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.546875, "loss": 2.9210708141326904 }, { "id": "identity_309700", "image": "TextVQA/images/1481db31b19b9381.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the athlete from this country?" }, { "from": "gpt", "value": "The answer is han tianyu" } ], "clip_score": 25.21875, "loss": 2.697467803955078 }, { "id": "identity_142425", "image": "MapQA/images/map_7913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color representing the value range for the state of Illinois?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 30.421875, "loss": 3.0890989303588867 }, { "id": "identity_178273", "image": "IconQA/images/19071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of internal angles in all the shapes combined?" }, { "from": "gpt", "value": "The answer is 2880 degrees" } ], "clip_score": 26.890625, "loss": 1.789790391921997 }, { "id": "identity_336518", "image": "A-OKVQA/images/184531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely number of wheels attached to the motorbike in the forefront of this scene?\nChoices:\n(A) one\n(B) two\n(C) four\n(D) three" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 15.78125, "loss": 3.428414821624756 }, { "id": "identity_239241", "image": "TQA/images/question_images/seasons_1448.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the Autumnal Equinox happens in the northern hemisphere during September, what happens in the southern hemisphere during the March equinox?" }, { "from": "gpt", "value": "The answer is Autumnal Equinox" } ], "clip_score": 38.03125, "loss": 2.7311978340148926 }, { "id": "identity_273862", "image": "Geometry3K/images/468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of the variable y shown in the figure.\nChoices:\n(A) 55\n(B) 115\n(C) 125\n(D) 135" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.78125, "loss": 3.538336992263794 }, { "id": "identity_271066", "image": "TabMWP/images/9311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Logan requires to purchase 8 pieces of elegant stationery?" }, { "from": "gpt", "value": "The answer is 6.96" } ], "clip_score": 26.953125, "loss": 2.028877019882202 }, { "id": "identity_113347", "image": "TextVQA/images/a831d550a637bb73.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street intersecting with Broadway according to the street sign?" }, { "from": "gpt", "value": "The answer is West 43rd Street" } ], "clip_score": 24.375, "loss": 1.9885129928588867 }, { "id": "identity_168645", "image": "IconQA/images/71096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add two dots to the last row, how many dots will be in the last row?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.90625, "loss": 1.9548320770263672 }, { "id": "identity_248724", "image": "FigureQA/images/3516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the maximum and minimum values among the given colors?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 22.4375, "loss": 2.3824048042297363 }, { "id": "identity_159403", "image": "GeoQA+/images/5226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 30.90625, "loss": 1.8908939361572266 }, { "id": "identity_149227", "image": "MapQA/images/map_4263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with a value less than 25%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.59375, "loss": 3.0147411823272705 }, { "id": "identity_261069", "image": "TextVQA/images/005c9476aa2077c2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary theme of the videos displayed on the channel shown?" }, { "from": "gpt", "value": "The answer is 3D content and stereoscopic visuals" } ], "clip_score": 25.84375, "loss": 2.2024130821228027 }, { "id": "identity_72283", "image": "AI2D/images/2198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which cycle do aphids produce live births?" }, { "from": "gpt", "value": "The answer is Summer Cycle" } ], "clip_score": 34.28125, "loss": 2.133598804473877 }, { "id": "identity_282975", "image": "PMC-VQA/images/PMC4936678_pone.0158747.g001_48838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter D represent in the given choices? \n(A) A lymphatic sinus \n(B) The capsule \n(C) The dome region \n(D) The interfollicular region" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.703125, "loss": 4.435408592224121 }, { "id": "identity_241056", "image": "FigureQA/images/90328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much does Royal Blue exceed Steel Blue at x = 50?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.359375, "loss": 2.5943262577056885 }, { "id": "identity_313704", "image": "Geometry3K/images/1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use this shape to find y.\nChoices:\n(A) 6.45\n(B) 12.9\n(C) 17.9\n(D) 35.8" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.6875, "loss": 3.032254934310913 }, { "id": "identity_30213", "image": "VQA2.0/images/135920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the lamp in the room?" }, { "from": "gpt", "value": "The answer is no lamp" } ], "clip_score": 24.0, "loss": 3.9573333263397217 }, { "id": "identity_23098", "image": "TabMWP/images/22121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Look at the following schedule. Barbara got on the bus at the art museum at 10.00 A.M. What time will she get to the university campus?\nChoices:\n(A) 11:00 A.M.\n(B) 9:15 A.M.\n(C) 10:00 A.M.\n(D) 12:00 P.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.5625, "loss": 3.130018949508667 }, { "id": "identity_177148", "image": "TabMWP/images/10053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students scored less than 20 points?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.015625, "loss": 2.5871665477752686 }, { "id": "identity_254393", "image": "TabMWP/images/19786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of members in all the clubs that have fewer than 65 members?" }, { "from": "gpt", "value": "The answer is 562" } ], "clip_score": 36.96875, "loss": 2.298584461212158 }, { "id": "identity_266506", "image": "VQA2.0/images/371791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of trees are in sight?" }, { "from": "gpt", "value": "The answer is barren" } ], "clip_score": 14.6953125, "loss": 3.746060371398926 }, { "id": "identity_82312", "image": "UniGeo/images/1730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠1 is 37°, what is the measure of ∠BAC?" }, { "from": "gpt", "value": "The answer is 74°" } ], "clip_score": 34.0, "loss": 2.147238254547119 }, { "id": "identity_92978", "image": "TabMWP/images/10882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many photographs were taken in total over the past 5 days?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 32.09375, "loss": 1.9682419300079346 }, { "id": "identity_262662", "image": "Geometry3K/images/1443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the segment is tangent, what is the value of x?\nChoices:\n(A) \\sqrt { 14 }\n(B) \\sqrt { 17 }\n(C) \\sqrt { 93 }\n(D) \\sqrt { 485 }" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.75, "loss": 3.5004372596740723 }, { "id": "identity_160483", "image": "Geometry3K/images/1035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle opposite to the angle measuring (2x + 41)°?" }, { "from": "gpt", "value": "The answer is 115°" } ], "clip_score": 28.609375, "loss": 1.4246519804000854 }, { "id": "identity_114188", "image": "PMC-VQA/images/PMC6115612_F8_361014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text on the left side of the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.609375, "loss": 3.261986494064331 }, { "id": "identity_2601", "image": "Geometry3K/images/31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x.\nChoices:\n(A) 30\n(B) 45\n(C) 60\n(D) 75" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.109375, "loss": 2.562272548675537 }, { "id": "identity_296851", "image": "PMC-VQA/images/PMC7808904_fig0020_343220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the condition depicted in the image above. \nOptions:\n(A) diverticulitis\n(B) herniation\n(C) mesenteric outpouching\n(D) propria muscularis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.625, "loss": 4.268076419830322 }, { "id": "identity_216835", "image": "A-OKVQA/images/259467.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color on the floor mat where the children are seated?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.390625, "loss": 3.4562439918518066 }, { "id": "identity_211701", "image": "A-OKVQA/images/142182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the teddy bears on the shelves?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 34.15625, "loss": 3.4699044227600098 }, { "id": "identity_86073", "image": "GeoQA+/images/677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the measure of angle A in parallelogram ABCD is 50 degrees, what is the measure of angle B?" }, { "from": "gpt", "value": "The answer is 130 degrees" } ], "clip_score": 32.03125, "loss": 1.4171568155288696 }, { "id": "identity_333414", "image": "UniGeo/images/729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点A、B、C在圆O上,∠AOB=30°,则∠ACB的度数是()\n选项:\n(A) 10°\n(B) 15°\n(C) 40°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.78125, "loss": 2.6080658435821533 }, { "id": "identity_332027", "image": "A-OKVQA/images/271011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they all wearing helmets?\nChoices:\n(A) accident safety\n(B) trendy\n(C) warmth\n(D) sunblock" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.84375, "loss": 4.216921329498291 }, { "id": "identity_283718", "image": "A-OKVQA/images/186175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of exhaust pipes that protrude from the sides of the large semi-truck shown above?\nChoices:\n(A) five\n(B) seven\n(C) two\n(D) three" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.421875, "loss": 3.3270668983459473 }, { "id": "identity_161136", "image": "UniGeo/images/1187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of triangle ABC?" }, { "from": "gpt", "value": "The answer is 12+6√2" } ], "clip_score": 30.78125, "loss": 2.7991979122161865 }, { "id": "identity_327944", "image": "AI2D/images/3529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part is referred to as the protoplasm outside the nucleus of a cell?\nChoices:\n(A) I\n(B) J\n(C) B\n(D) F" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.890625, "loss": 4.13194465637207 }, { "id": "identity_100648", "image": "IconQA/images/51649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles are intersected by the line?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.15625, "loss": 2.579725980758667 }, { "id": "identity_103651", "image": "VQA-AS/images/12507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is depicted in the image?" }, { "from": "gpt", "value": "The answer is Wooden" } ], "clip_score": 26.265625, "loss": 3.3016889095306396 }, { "id": "identity_208183", "image": "VQA2.0/images/292146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are parked in the second row from the left?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.1875, "loss": 2.467003583908081 }, { "id": "identity_259972", "image": "DocVQA/images/ylng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration in seconds for which the transmission was active, and what was the start time of the transmission in 24-hour format?" }, { "from": "gpt", "value": "The answer is 27 seconds, 12:40" } ], "clip_score": 20.46875, "loss": 2.430497884750366 }, { "id": "identity_218094", "image": "Super-CLEVR/images/superCLEVR_new_001994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are airborne?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.15625, "loss": 2.581451416015625 }, { "id": "identity_44525", "image": "DVQA/images/bar_train_00154141.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'meat' and 'cost' datasets for the 'corps' algorithm?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 36.78125, "loss": 3.422041654586792 }, { "id": "identity_72119", "image": "AI2D/images/602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the stage that comes after LARVA in the life cycle?" }, { "from": "gpt", "value": "The answer is PUPA" } ], "clip_score": 34.25, "loss": 1.6997102499008179 }, { "id": "identity_4820", "image": "PMC-VQA/images/PMC6484689_Fig2_471738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was used in the image? \nChoices:\n(A) Black and white staining \n(B) Hematoxylin and eosin staining \n(C) Masson's trichrome staining \n(D) Immunofluorescence staining " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.34375, "loss": 3.9225196838378906 }, { "id": "identity_139470", "image": "DVQA/images/bar_train_00167366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label has the highest total value of bars in the graph?" }, { "from": "gpt", "value": "The answer is heat\" with a total value of 24" } ], "clip_score": 22.359375, "loss": 2.8646814823150635 }, { "id": "identity_59827", "image": "FigureQA/images/83048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Orchid greater than Steel Blue at x-axis label 60?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.59375, "loss": 3.358928680419922 }, { "id": "identity_157428", "image": "ScienceQA/images/5319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the final temperature for both the uncovered and covered aquariums?" }, { "from": "gpt", "value": "The answer is 16.2°C" } ], "clip_score": 32.5, "loss": 1.8537955284118652 }, { "id": "identity_324876", "image": "ScienceQA/images/9170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood that an offspring from this genetic cross will have red eyes?\nChoices:\n(A) 4/4\n(B) 0/4\n(C) 2/4\n(D) 1/4\n(E) 3/4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.6875, "loss": 3.3030178546905518 }, { "id": "identity_116071", "image": "A-OKVQA/images/402228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red cups can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 17.640625, "loss": 4.363745212554932 }, { "id": "identity_280438", "image": "ChartQA/images/two_col_24382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of people are satisfied to some extent?" }, { "from": "gpt", "value": "The answer is 84" } ], "clip_score": 29.109375, "loss": 2.9885125160217285 }, { "id": "identity_26374", "image": "ScienceQA/images/3341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the colony shown?\nChoices:\n(A) Massachusetts\n(B) South Carolina\n(C) Connecticut\n(D) Maine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.234375, "loss": 4.6957902908325195 }, { "id": "identity_76004", "image": "TQA/images/abc_question_images/parts_telescope_17287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label A represent in the diagram?" }, { "from": "gpt", "value": "The answer is Telescope tube" } ], "clip_score": 28.71875, "loss": 2.6919589042663574 }, { "id": "identity_270657", "image": "Geometry3K/images/344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the area of the kite from the given choices: (A) 42, (B) 52.5, (C) 54, (D) 63." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.109375, "loss": 2.7899155616760254 }, { "id": "identity_118906", "image": "VQA2.0/images/34777.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other elephants visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.53125, "loss": 3.1099889278411865 }, { "id": "identity_303518", "image": "ChartQA/images/multi_col_978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the population is in this age group in 2019?" }, { "from": "gpt", "value": "The answer is 15.28" } ], "clip_score": 28.875, "loss": 2.1807775497436523 }, { "id": "identity_28325", "image": "MapQA/images/map_10151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Arkansas ?" }, { "from": "gpt", "value": "The answer is 47.2%-53.0%" } ], "clip_score": 25.9375, "loss": 2.4507505893707275 }, { "id": "identity_104625", "image": "TextVQA/images/b4964b5351dc4fc2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the drinking fountain?" }, { "from": "gpt", "value": "The answer is Elkay" } ], "clip_score": 31.078125, "loss": 2.8131985664367676 }, { "id": "identity_310927", "image": "DVQA/images/bar_train_00168835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories have at least one bar with a value greater than 9?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 21.546875, "loss": 3.963198184967041 }, { "id": "identity_316394", "image": "VQA2.0/images/267697.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many creatures are present here?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 18.921875, "loss": 1.7901651859283447 }, { "id": "identity_234233", "image": "GeoQA+/images/1912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,ABCD的对角线AC、BD相交于点O,点E是CD的中点,若AD=6,且AB=8,求四边形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 12√7" } ], "clip_score": 25.25, "loss": 2.711679220199585 }, { "id": "identity_307730", "image": "DVQA/images/bar_train_00067929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across these datasets?" }, { "from": "gpt", "value": "The answer is pair" } ], "clip_score": 33.84375, "loss": 4.326054573059082 }, { "id": "identity_88442", "image": "Geometry3K/images/2054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AD in triangle ADC?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.65625, "loss": 2.4283502101898193 }, { "id": "identity_243389", "image": "A-OKVQA/images/160968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the metal shapes attached to the front of the horse's breast collar, and how do they relate to the uniforms worn by the riders?" }, { "from": "gpt", "value": "The answer is They indicate the horses are part of a police unit" } ], "clip_score": 27.25, "loss": 2.988097667694092 }, { "id": "identity_4063", "image": "GeoQA+/images/5988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知AB、AD是⊙O的弦,∠B=30°,点C在弦AB上,连接CO并延长CO交于⊙O于点D,∠D=20°,则∠BAD的度数是()\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.90625, "loss": 2.593315601348877 }, { "id": "identity_300249", "image": "FigureQA/images/73942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.4453125, "loss": 4.619732856750488 }, { "id": "identity_222810", "image": "TextVQA/images/5a9be206764084b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant type of liquor displayed on the shelf?" }, { "from": "gpt", "value": "The answer is Rum" } ], "clip_score": 32.1875, "loss": 3.9055838584899902 }, { "id": "identity_26006", "image": "DocVQA/images/xtdw0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Dr.J. H. Reynolds, IV was working as?" }, { "from": "gpt", "value": "The answer is Principal Scient/Mgr" } ], "clip_score": 22.375, "loss": 3.8944149017333984 }, { "id": "identity_270464", "image": "PMC-VQA/images/PMC7057551_Fig3_129314.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was used on the specimen, with the options being crystal violet-amaranth, Wright-Giemsa, hematoxylin-eosin, and safranin-fast green?\nChoices:\n(A) crystal violet-amaranth \n(B) Wright-Giemsa \n(C) hematoxylin-eosin \n(D) safranin-fast green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.09375, "loss": 3.1765708923339844 }, { "id": "identity_221537", "image": "VQA2.0/images/98631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas are depicted in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 26.3125, "loss": 2.3250162601470947 }, { "id": "identity_216090", "image": "PMC-VQA/images/PMC7880958_Fig2_367099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of organ displacement due to the mass?" }, { "from": "gpt", "value": "The answer is Yes, left kidney is displaced posteriorly due to the mass" } ], "clip_score": 26.265625, "loss": 2.434088945388794 }, { "id": "identity_236197", "image": "DocVQA/images/jybx0223_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific measures did the company implement for energy conservation and technological absorption that contributed to their outstanding achievement in environment management?" }, { "from": "gpt", "value": "The answer is Energy conservation and technological innovation measures" } ], "clip_score": 18.109375, "loss": 3.430342435836792 }, { "id": "identity_267565", "image": "IconQA/images/52251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of dots in each of the three equal groups that the model with 6 dots is divided into?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.0, "loss": 2.033947706222534 }, { "id": "identity_9493", "image": "AI2D/images/2046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram, which of the following can be considered producers?\nChoices:\n(A) Sugar Cane Plants, Wild Plants\n(B) Beetle, Grasshopper\n(C) Sugar Cane Plants, Snail\n(D) Weevil, Mouse" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.46875, "loss": 3.4937851428985596 }, { "id": "identity_109088", "image": "A-OKVQA/images/445928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more umbrellas with a single color or with patterns?" }, { "from": "gpt", "value": "The answer is Single color" } ], "clip_score": 26.59375, "loss": 3.0888805389404297 }, { "id": "identity_231047", "image": "VizWiz/images/VizWiz_train_00015153.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the manufacturing date of this product and how many days are left until it expires if the shelf life is 365 days from the manufacturing date?" }, { "from": "gpt", "value": "The answer is 91 days" } ], "clip_score": 22.6875, "loss": 2.792144536972046 }, { "id": "identity_267556", "image": "Super-CLEVR/images/superCLEVR_new_001425.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the object located to the right of the gray shiny bicycle and to the left of the big rubber fighter?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 24.6875, "loss": 4.325833320617676 }, { "id": "identity_320425", "image": "DVQA/images/bar_train_00080004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there stacked bars in this chart?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 26.796875, "loss": 3.6766605377197266 }, { "id": "identity_226300", "image": "Super-CLEVR/images/superCLEVR_new_000563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in the image, and what is the total number of wheels visible on all the objects combined?" }, { "from": "gpt", "value": "The answer is 5 objects, 24 wheels" } ], "clip_score": 23.515625, "loss": 2.17331600189209 }, { "id": "identity_14340", "image": "ScienceQA/images/15056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In this food chain, the midge larva is a primary consumer. Why?\nChoices:\n(A) It eats a secondary consumer.\n(B) It eats a producer.\n(C) It eats a primary consumer." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.625, "loss": 3.750959873199463 }, { "id": "identity_121462", "image": "TextVQA/images/74f3ef4c334bf357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the event shown on the screen?" }, { "from": "gpt", "value": "The answer is Megapolis 2025" } ], "clip_score": 20.609375, "loss": 3.597151756286621 }, { "id": "identity_144067", "image": "MapQA/images/map_10955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of benefits in Florida greater than 2609792?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.890625, "loss": 3.1384594440460205 }, { "id": "identity_286674", "image": "A-OKVQA/images/107535.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason behind the woman pushing the box?\nChoices:\n(A) cross street\n(B) contact police\n(C) get cab\n(D) get help" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.625, "loss": 4.16256856918335 }, { "id": "identity_117813", "image": "VizWiz/images/VizWiz_train_00018727.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the battery percentage displayed on the screen?" }, { "from": "gpt", "value": "The answer is 80%" } ], "clip_score": 21.515625, "loss": 1.7529377937316895 }, { "id": "identity_314956", "image": "GeoQA+/images/4713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把一块含有45°角的直角三角板的两个顶点放在直尺的对边上,如果∠1=20°,那么∠2等于多少?\nChoices:\n(A) 30°\n(B) 25°\n(C) 20°\n(D) 15°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.390625, "loss": 2.4149277210235596 }, { "id": "identity_162114", "image": "UniGeo/images/1250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment OM when circle O touches line segment AM?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.75, "loss": 2.633582830429077 }, { "id": "identity_299577", "image": "UniGeo/images/994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, AB is the diameter of circle O, and ∠CAB is 40°. What is the measure of angle D?\nChoices:\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.3125, "loss": 2.3651671409606934 }, { "id": "identity_106458", "image": "A-OKVQA/images/554285.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person wearing any wrist accessories?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.5, "loss": 3.253901243209839 }, { "id": "identity_150317", "image": "ScienceQA/images/11626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 24.96875, "loss": 2.805143117904663 }, { "id": "identity_4525", "image": "IconQA/images/69152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many airplanes are there?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 31.78125, "loss": 2.00715970993042 }, { "id": "identity_238285", "image": "DocVQA/images/xfhb0228_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which dates and at what times are the registration sessions scheduled?" }, { "from": "gpt", "value": "The answer is October 1 at 5:00-8:00 p.m., October 2 at 8:00-10:00 a.m" } ], "clip_score": 22.859375, "loss": 1.2520674467086792 }, { "id": "identity_276693", "image": "AI2D/images/527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does stage H represent in the diagram?\nChoices:\n(A) metamorphic rock\n(B) sediments\n(C) sedimentary rock\n(D) igneous rock" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.40625, "loss": 4.158682823181152 }, { "id": "identity_201767", "image": "VQA2.0/images/410123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the model of the truck?" }, { "from": "gpt", "value": "The answer is Ram 1500" } ], "clip_score": 28.96875, "loss": 1.6734600067138672 }, { "id": "identity_232475", "image": "VizWiz/images/VizWiz_train_00014605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the water bottle, and what objects are located to the left and right of it?" }, { "from": "gpt", "value": "The answer is Red, tape dispenser, black circular object with carabiner" } ], "clip_score": 27.84375, "loss": 3.8010129928588867 }, { "id": "identity_320515", "image": "A-OKVQA/images/178666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: When does this exhibit end?\nChoices:\n(A) 2010\n(B) 2011\n(C) 2012\n(D) 2013" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.421875, "loss": 3.360913038253784 }, { "id": "identity_124139", "image": "VQA-RAD/images/synpic21044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this X-ray show any signs of a hiatal hernia?" }, { "from": "gpt", "value": "The answer is No signs of a hiatal hernia" } ], "clip_score": 29.609375, "loss": 2.2421998977661133 }, { "id": "identity_302416", "image": "VQA-RAD/images/synpic21604.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which liver lobe is the lesion located?" }, { "from": "gpt", "value": "The answer is Right lobe" } ], "clip_score": 26.53125, "loss": 2.372196912765503 }, { "id": "identity_78207", "image": "AI2D/images/4178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of rock is formed directly from magma according to the image?" }, { "from": "gpt", "value": "The answer is Granite, diorite, gabbro" } ], "clip_score": 26.40625, "loss": 2.3789992332458496 }, { "id": "identity_268038", "image": "VQA2.0/images/68656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of cows shown in the picture?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 21.84375, "loss": 2.051858425140381 }, { "id": "identity_101187", "image": "IconQA/images/76176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more pillows are needed so that each sleeping bag has one?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 36.1875, "loss": 2.716526985168457 }, { "id": "identity_214353", "image": "PMC-VQA/images/PMC7007391_Figure1_113430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is used to obtain these images?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scans" } ], "clip_score": 29.53125, "loss": 1.4968515634536743 }, { "id": "identity_257997", "image": "PMC-VQA/images/PMC7042148_ytz182-F1_124673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which chamber of the heart shows metastasis on the echocardiogram, and what are the key anatomical features visible in this chamber that help identify it?" }, { "from": "gpt", "value": "The answer is Right ventricle, tricuspid valve, moderator band, triangular shape" } ], "clip_score": 33.6875, "loss": 2.79945969581604 }, { "id": "identity_254727", "image": "Geometry3K/images/1789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Quadrilateral J K L M is a rectangle. If m ∠KJL = 2x + 4 and m ∠JLK = 7x + 5, find the measure of ∠JPL" }, { "from": "gpt", "value": "The answer is 34 degrees" } ], "clip_score": 29.828125, "loss": 2.5161705017089844 }, { "id": "identity_237791", "image": "ScienceQA/images/11623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary producer, primary consumer, secondary consumer, and tertiary consumer in the food web, and describe the energy flow from the primary producer to the tertiary consumer" }, { "from": "gpt", "value": "The answer is Bilberry, Brown lemming, Arctic fox, Rough-legged hawk" } ], "clip_score": 27.578125, "loss": 2.3223965167999268 }, { "id": "identity_161126", "image": "GeoQA+/images/3458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠CED = 50° and the triangle board has a 90° angle, what is the measure of ∠ECD?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 33.5, "loss": 1.9700169563293457 }, { "id": "identity_78614", "image": "AI2D/images/3701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled directly above the stomach?" }, { "from": "gpt", "value": "The answer is Pharynx" } ], "clip_score": 31.203125, "loss": 3.222982168197632 }, { "id": "identity_182099", "image": "PMC-VQA/images/PMC3251536_F1_121144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the lesion be characterized as solid or cystic based on the image?" }, { "from": "gpt", "value": "The answer is Cystic lesion" } ], "clip_score": 24.9375, "loss": 2.33780574798584 }, { "id": "identity_252239", "image": "IconQA/images/30464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, what is the probability that at least one of them will be black?" }, { "from": "gpt", "value": "The answer is 0.4" } ], "clip_score": 27.78125, "loss": 2.6351046562194824 }, { "id": "identity_334458", "image": "VQA-AS/images/14630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there something casting a shadow?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.359375, "loss": 3.6284332275390625 }, { "id": "identity_216409", "image": "Super-CLEVR/images/superCLEVR_new_001264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the position of the purple airplane relative to the cyan airplane?" }, { "from": "gpt", "value": "The answer is Above and to the right" } ], "clip_score": 26.125, "loss": 2.6912007331848145 }, { "id": "identity_305072", "image": "ChartQA/images/24098382002241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the largest and smallest bars?" }, { "from": "gpt", "value": "The answer is 7.01" } ], "clip_score": 10.3203125, "loss": 2.5015318393707275 }, { "id": "identity_122062", "image": "VQA-AS/images/17433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train on the floor?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.984375, "loss": 2.9293837547302246 }, { "id": "identity_103173", "image": "TabMWP/images/551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bus trips are there from the doctor's office to the playground before noon?" }, { "from": "gpt", "value": "The answer is 2 trips" } ], "clip_score": 28.296875, "loss": 2.3443379402160645 }, { "id": "identity_89210", "image": "UniGeo/images/133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if the length of AB is 10 and the length of AG is 8, what is the ratio of the area of triangle ABG to the area of triangle AEG?" }, { "from": "gpt", "value": "The answer is (8/5)^2 or 64/25" } ], "clip_score": 32.0625, "loss": 1.6643297672271729 }, { "id": "identity_81312", "image": "Geometry3K/images/1975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the parallelogram?" }, { "from": "gpt", "value": "The answer is 38.4 inches" } ], "clip_score": 37.3125, "loss": 1.8502784967422485 }, { "id": "identity_14316", "image": "TabMWP/images/21748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each jar has 5 pickles. How many pickles are in 6 jars?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 32.25, "loss": 1.4899052381515503 }, { "id": "identity_258293", "image": "DocVQA/images/fqhm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total amount enclosed in the check mentioned in the letter, and what is the designation of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is $40.75 and Director and Executive Secretary" } ], "clip_score": 22.890625, "loss": 3.265774726867676 }, { "id": "identity_199668", "image": "VQA2.0/images/42619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light facing the pedestrians?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.328125, "loss": 2.8468680381774902 }, { "id": "identity_302607", "image": "GeoQA+/images/4489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在四边形ABCD中,AB=1,BC=1,CD=2,DA=√{6},且∠ABC=90°,则这个四边形的面积是()\nChoices:\n(A) 2\n(B) 0.5+√{2}\n(C) 1+√{2}\n(D) \\frac{1+√{2}}{2}" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.09375, "loss": 2.429579973220825 }, { "id": "identity_273673", "image": "GeoQA+/images/5986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, given that the line segment AB = 12, extend the line segment AB to point C such that BC = 1/2 AB, point D is the midpoint of line segment AC, what is the length of line segment BD?\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.109375, "loss": 2.366442918777466 }, { "id": "identity_184438", "image": "PMC-VQA/images/PMC6692028_pone.0220922.g001_22949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Horizontal cross-section" } ], "clip_score": 31.40625, "loss": 4.1066155433654785 }, { "id": "identity_137459", "image": "DocVQA/images/frld0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which meetings will the CME programs be held at?" }, { "from": "gpt", "value": "The answer is ADA, DNE, and APS meeting" } ], "clip_score": 24.828125, "loss": 1.9776802062988281 }, { "id": "identity_64790", "image": "ScienceQA/images/8881.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible damage or alteration to the penny in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is a hole" } ], "clip_score": 34.03125, "loss": 3.0340640544891357 }, { "id": "identity_176862", "image": "TabMWP/images/14960.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the vendor aims to reduce the number of leftover loaves by 10% each day starting from Tuesday, did they achieve this goal on Wednesday?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 42.65625, "loss": 3.048699140548706 }, { "id": "identity_171596", "image": "IconQA/images/98821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the image represents a pair, how many pairs are there?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.234375, "loss": 2.2167603969573975 }, { "id": "identity_75676", "image": "ScienceQA/images/20898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Nebraska?" }, { "from": "gpt", "value": "The answer is Kansas" } ], "clip_score": 30.984375, "loss": 2.7777082920074463 }, { "id": "identity_132700", "image": "PMC-VQA/images/PMC5823628_F2_279407.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many weeks post-treatment is this MRI scan taken?" }, { "from": "gpt", "value": "The answer is 6 weeks" } ], "clip_score": 35.28125, "loss": 1.802410364151001 }, { "id": "identity_35186", "image": "GeoQA+/images/3637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,∠AOB=20°,∠AOC=90°,点B、O、D在同一直线上,则∠COD的度数为()\nChoices:\n(A) 100°\n(B) 105°\n(C) 110°\n(D) 115°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.40625, "loss": 2.5593292713165283 }, { "id": "identity_275646", "image": "FigureQA/images/80142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Navy Blue and Medium Periwinkle intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.6875, "loss": 3.7716705799102783 }, { "id": "identity_323969", "image": "DocVQA/images/frld0217_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this called?" }, { "from": "gpt", "value": "The answer is Thought Leader Mentorship Program" } ], "clip_score": 24.859375, "loss": 1.9931718111038208 }, { "id": "identity_110965", "image": "PMC-VQA/images/PMC5880870_Fig2_296385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct red areas can be counted in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 23.015625, "loss": 4.5978169441223145 }, { "id": "identity_149572", "image": "DocVQA/images/jtvd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many members are in the Executive Committee?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 18.71875, "loss": 2.3138723373413086 }, { "id": "identity_278759", "image": "Geometry3K/images/995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the measure of angle DGF is 53 degrees and the measure of angle AGC is 40 degrees, what is the measure of angle 3?\nChoices:\n(A) 40\n(B) 47\n(C) 50\n(D) 53" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.1875, "loss": 2.4944281578063965 }, { "id": "identity_253992", "image": "ScienceQA/images/8995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage is the volume of Uranus greater than the volume of Neptune?" }, { "from": "gpt", "value": "The answer is approximately 9.28%" } ], "clip_score": 25.734375, "loss": 2.4296486377716064 }, { "id": "identity_336424", "image": "Super-CLEVR/images/superCLEVR_new_000706.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the matte thing to the left of the big matte object the same color as the metallic object behind the small metal biplane?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 25.125, "loss": 5.1648054122924805 }, { "id": "identity_197343", "image": "VQA2.0/images/386390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animals are grazing in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Sheep" } ], "clip_score": 29.421875, "loss": 2.4322450160980225 }, { "id": "identity_137482", "image": "DVQA/images/bar_train_00129903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value associated with 'bar' on the chart?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 29.78125, "loss": 2.989534378051758 }, { "id": "identity_136867", "image": "PlotQA/images/49906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the highest value added by the services sector?" }, { "from": "gpt", "value": "The answer is South Africa" } ], "clip_score": 30.515625, "loss": 2.2619104385375977 }, { "id": "identity_172865", "image": "IconQA/images/44845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one marble is removed, what is the probability of picking a purple marble?" }, { "from": "gpt", "value": "The answer is 0.75" } ], "clip_score": 33.625, "loss": 2.070124387741089 }, { "id": "identity_155385", "image": "ScienceQA/images/6093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the webbed structure observed in the animal's feet?" }, { "from": "gpt", "value": "The answer is Aiding in swimming" } ], "clip_score": 21.5, "loss": 3.5078179836273193 }, { "id": "identity_274092", "image": "TextVQA/images/1a6a82e6106c75a6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the current time?" }, { "from": "gpt", "value": "The answer is 10:10" } ], "clip_score": 17.8125, "loss": 1.793304681777954 }, { "id": "identity_47291", "image": "FigureQA/images/16325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Bubblegum section represent more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.125, "loss": 2.9540627002716064 }, { "id": "identity_124124", "image": "TextVQA/images/6abf68c8b3f0f606.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What landmark is shown in the bottom right image within the letter \"N\"?" }, { "from": "gpt", "value": "The answer is State Capitol in Austin" } ], "clip_score": 21.125, "loss": 2.40495228767395 }, { "id": "identity_147268", "image": "DVQA/images/bar_train_00138552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many outliers are present in the 'mood' dataset?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.515625, "loss": 2.3287980556488037 }, { "id": "identity_206838", "image": "VizWiz/images/VizWiz_train_00019004.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the mug?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.359375, "loss": 3.426032304763794 }, { "id": "identity_289213", "image": "DocVQA/images/kkxw0023_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the agency?" }, { "from": "gpt", "value": "The answer is long haymes carr lintas" } ], "clip_score": 21.296875, "loss": 5.940708160400391 }, { "id": "identity_29626", "image": "PMC-VQA/images/PMC7025095_fig0005_119221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the CT scan show? \nChoices:\n(A) evidence of inguinal hernia \n(B) retroperitoneal hematoma \n(C) evidence of both inguinal hernia and retroperitoneal hematoma \n(D) none of the above. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.859375, "loss": 4.526888370513916 }, { "id": "identity_184774", "image": "VQA2.0/images/435454.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the departing train?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.765625, "loss": 3.6750106811523438 }, { "id": "identity_110248", "image": "VizWiz/images/VizWiz_train_00000593.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the ink used for the printing on the can?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 29.203125, "loss": 3.3826541900634766 }, { "id": "identity_54724", "image": "FigureQA/images/63070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value of Medium Turquoise on the y-axis label?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 26.828125, "loss": 2.3183059692382812 }, { "id": "identity_106533", "image": "TextVQA/images/129ae867a15bb330.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is depicted in the image?" }, { "from": "gpt", "value": "The answer is Horse-drawn carriages and a train" } ], "clip_score": 22.5, "loss": 2.473536729812622 }, { "id": "identity_323878", "image": "TabMWP/images/296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy a cockatoo and a pot-bellied pig?" }, { "from": "gpt", "value": "The answer is 73" } ], "clip_score": 36.125, "loss": 3.137827157974243 }, { "id": "identity_142062", "image": "DVQA/images/bar_train_00166005.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the category labeled \"health\"?" }, { "from": "gpt", "value": "The answer is Between 10^5 and 10^7" } ], "clip_score": 21.953125, "loss": 2.46285080909729 }, { "id": "identity_227383", "image": "AI2D/images/4212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain the process of how CO2 from the atmosphere eventually contributes to the formation of carbonate rocks on the oceanic plate, using the diagram" }, { "from": "gpt", "value": "The answer is CO2 forms carbonic acid, weathers silicates, deposits as CaCO3" } ], "clip_score": 32.15625, "loss": 2.888313055038452 }, { "id": "identity_230529", "image": "VQA2.0/images/59080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image, and what is the color of the umbrella held by the person sitting on the chair?" }, { "from": "gpt", "value": "The answer is 12 people, white umbrella" } ], "clip_score": 26.9375, "loss": 2.194140911102295 }, { "id": "identity_5311", "image": "AI2D/images/2236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Letter E in the following diagram corresponds to which stage in the life cycle?\nChoices:\n(A) young larva\n(B) eggs\n(C) pupa in chrysalis\n(D) older larva" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.875, "loss": 4.747494697570801 }, { "id": "identity_45411", "image": "FigureQA/images/22288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a legend that explains the colors in the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.6875, "loss": 2.9566032886505127 }, { "id": "identity_75982", "image": "AI2D/images/1813.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is a common prey for both the Lynx and the Weasel?" }, { "from": "gpt", "value": "The answer is Amphibians" } ], "clip_score": 29.40625, "loss": 2.7884132862091064 }, { "id": "identity_80806", "image": "GeoQA+/images/3782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AD is the bisector of ∠CAE, ∠B = 45°, and ∠DAE = 75°, what is the measure of ∠ABD?" }, { "from": "gpt", "value": "The answer is 52.5°" } ], "clip_score": 32.71875, "loss": 1.6427701711654663 }, { "id": "identity_251769", "image": "DocVQA/images/pkbd0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document and the location of the meeting?" }, { "from": "gpt", "value": "The answer is National Institute of Arthritis and Metabolic Diseases, Atlantic City" } ], "clip_score": 26.34375, "loss": 2.168630599975586 }, { "id": "identity_275719", "image": "DVQA/images/bar_train_00182810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the worth of the agreement in essence?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 18.421875, "loss": 3.110903024673462 }, { "id": "identity_42535", "image": "FigureQA/images/50627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sum of Midnight Blue and Bubblegum's proportions greater than 75%?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.40625, "loss": 3.3999054431915283 }, { "id": "identity_211232", "image": "PMC-VQA/images/PMC7779423_fig0009_333740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many color-coded directional lines are visible in the enlarged region?" }, { "from": "gpt", "value": "The answer is 22 lines" } ], "clip_score": 22.53125, "loss": 2.68017315864563 }, { "id": "identity_138029", "image": "PlotQA/images/4997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest percentage of child employment for females attending school?" }, { "from": "gpt", "value": "The answer is 80%" } ], "clip_score": 35.0, "loss": 2.2513599395751953 }, { "id": "identity_321230", "image": "TextVQA/images/1c704d0742d08a73.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which soda brand is being advertised here?" }, { "from": "gpt", "value": "The answer is coca cola" } ], "clip_score": 16.15625, "loss": 2.073061943054199 }, { "id": "identity_591", "image": "Super-CLEVR/images/superCLEVR_new_000108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the rubber car and the shiny road bike in front of the large blue matte aeroplane have the same color?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 28.203125, "loss": 5.363919258117676 }, { "id": "identity_175951", "image": "TabMWP/images/2431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hammers sold in the past 5 days?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 39.65625, "loss": 2.154569387435913 }, { "id": "identity_140090", "image": "DocVQA/images/pkhd0227_27.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the event for which the document was prepared?" }, { "from": "gpt", "value": "The answer is Community Conference on Food and Population" } ], "clip_score": 18.5625, "loss": 3.1366121768951416 }, { "id": "identity_131967", "image": "VQA2.0/images/1756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any beverage cans on the table in the foreground?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.09375, "loss": 3.2246642112731934 }, { "id": "identity_11374", "image": "TabMWP/images/2791.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Joe kept track of the number of characters in each book he read. How many books had at least 2 characters?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 22.21875, "loss": 2.2514615058898926 }, { "id": "identity_292882", "image": "VQA2.0/images/442774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current time displayed on the clock?" }, { "from": "gpt", "value": "The answer is 5:50" } ], "clip_score": 15.640625, "loss": 1.8314671516418457 }, { "id": "identity_169612", "image": "TabMWP/images/10274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of hours slept by Aisha during the 7 days?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 27.34375, "loss": 4.321966171264648 }, { "id": "identity_137002", "image": "DVQA/images/bar_train_00105570.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first stack of bars from the top?" }, { "from": "gpt", "value": "The answer is start" } ], "clip_score": 26.046875, "loss": 4.1414923667907715 }, { "id": "identity_111688", "image": "Super-CLEVR/images/superCLEVR_new_001533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 23.734375, "loss": 2.125324249267578 }, { "id": "identity_141171", "image": "PlotQA/images/85432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Uruguay see the highest investment in telecom according to the graph?" }, { "from": "gpt", "value": "The answer is 2008" } ], "clip_score": 32.25, "loss": 1.5905508995056152 }, { "id": "identity_69909", "image": "ScienceQA/images/2034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the arrowhead have any visible texture or pattern?" }, { "from": "gpt", "value": "The answer is Yes, wavy lines and shaded texture" } ], "clip_score": 33.3125, "loss": 3.9217934608459473 }, { "id": "identity_129943", "image": "PMC-VQA/images/PMC5579036_Fig2_210075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification scale indicated at the bottom of the TEM image?" }, { "from": "gpt", "value": "The answer is 500nm" } ], "clip_score": 31.4375, "loss": 1.8204193115234375 }, { "id": "identity_24072", "image": "PMC-VQA/images/PMC5358222_F7_142636.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which book is the image taken from? \nChoices:\n(A) Gray's Anatomy \n(B) Nelson's Pediatrics \n(C) Robbins Pathology \n(D) Harrison's Internal Medicine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.703125, "loss": 5.292337417602539 }, { "id": "identity_258146", "image": "TabMWP/images/6607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of cars in all parking lots that had at least 42 cars but fewer than 55 cars?" }, { "from": "gpt", "value": "The answer is 345" } ], "clip_score": 27.109375, "loss": 2.147779941558838 }, { "id": "identity_230559", "image": "IconQA/images/6457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of shapes (triangles and rectangles) on the frame?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 32.8125, "loss": 1.870409607887268 }, { "id": "identity_122426", "image": "PMC-VQA/images/PMC8798700_F1_179686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the lesion indicated by the arrow in centimeters?" }, { "from": "gpt", "value": "The answer is The size in centimeters based on the scale" } ], "clip_score": 22.328125, "loss": 3.5180163383483887 }, { "id": "identity_158438", "image": "ScienceQA/images/6555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 27.078125, "loss": 3.9700942039489746 }, { "id": "identity_7413", "image": "PMC-VQA/images/PMC2844753_F0004_60335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What plane of the body is the image showing? \nChoices:\n(A) Transverse plane \n(B) Frontal plane \n(C) Sagittal plane \n(D) Coronal plane " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.953125, "loss": 4.2350358963012695 }, { "id": "identity_22305", "image": "TextVQA/images/a6b021a5dc855988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what shoe company is advertised here?" }, { "from": "gpt", "value": "The answer is adidas" } ], "clip_score": 19.09375, "loss": 2.4722867012023926 }, { "id": "identity_71720", "image": "AI2D/images/3284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Golgi apparatus\"?" }, { "from": "gpt", "value": "The answer is Modifying, sorting, packaging proteins and lipids" } ], "clip_score": 27.46875, "loss": 2.032733917236328 }, { "id": "identity_270493", "image": "ChartQA/images/multi_col_101066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio between the smallest light blue bar and the largest green bar, expressed as A:B?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 17.6875, "loss": 2.7870407104492188 }, { "id": "identity_96947", "image": "TabMWP/images/13991.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of leftover loaves for the entire week?" }, { "from": "gpt", "value": "The answer is 149" } ], "clip_score": 42.3125, "loss": 2.205345630645752 }, { "id": "identity_319672", "image": "MapQA/images/map_10611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the highest value in the West?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.015625, "loss": 2.7242393493652344 }, { "id": "identity_167762", "image": "TabMWP/images/9072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what price does the quantity demanded equal the quantity supplied?" }, { "from": "gpt", "value": "The answer is $1,295" } ], "clip_score": 30.46875, "loss": 1.9448944330215454 }, { "id": "identity_136841", "image": "FigureQA/images/72147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines on the graph have a downward trend between the x-axis label values of 60 and 80?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.203125, "loss": 2.1958494186401367 }, { "id": "identity_73380", "image": "AI2D/images/1332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the respiratory system is directly connected to the mouth?" }, { "from": "gpt", "value": "The answer is Trachea" } ], "clip_score": 31.234375, "loss": 1.9366414546966553 }, { "id": "identity_153924", "image": "ScienceQA/images/17263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the only non-food item that both Chad and Warren want?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 30.96875, "loss": 2.984635353088379 }, { "id": "identity_174647", "image": "TabMWP/images/2281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Akira wants to buy 3 trailers, how much will it cost him?" }, { "from": "gpt", "value": "The answer is $18,948.00" } ], "clip_score": 25.5, "loss": 1.6560077667236328 }, { "id": "identity_110300", "image": "TextVQA/images/1cda533fa02b6f8f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the network signal strength shown on the phone?" }, { "from": "gpt", "value": "The answer is 3 bars" } ], "clip_score": 22.40625, "loss": 2.696681499481201 }, { "id": "identity_276133", "image": "TextVQA/images/23733e0799cc2200.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the brand of this product?" }, { "from": "gpt", "value": "The answer is android" } ], "clip_score": 30.5625, "loss": 3.0946788787841797 }, { "id": "identity_336546", "image": "PMC-VQA/images/PMC7686009_Fig9_304362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific part of the pelvis shows retroversion in this image?\nChoices:\n(A) Sacrum\n(B) Coccyx\n(C) Left Acetabulum\n(D) Right Acetabulum" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.359375, "loss": 4.1100077629089355 }, { "id": "identity_196688", "image": "PMC-VQA/images/PMC8870832_diagnostics-12-00306-f005_203038.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled as 'a' in this image?" }, { "from": "gpt", "value": "The answer is Right lung affected area" } ], "clip_score": 30.8125, "loss": 5.065240383148193 }, { "id": "identity_241051", "image": "DVQA/images/bar_train_00012223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people who preferred 'wind' and 'lead' in the sixth group of bars from the left?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 20.171875, "loss": 1.804762601852417 }, { "id": "identity_139626", "image": "FigureQA/images/39638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented by shades of red?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.84375, "loss": 2.4004859924316406 }, { "id": "identity_328411", "image": "VQA2.0/images/63681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What gas station is visible in the image?" }, { "from": "gpt", "value": "The answer is texaco" } ], "clip_score": 13.5234375, "loss": 3.164550542831421 }, { "id": "identity_74605", "image": "AI2D/images/2984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the location of the Central canal?" }, { "from": "gpt", "value": "The answer is Central canal" } ], "clip_score": 19.75, "loss": 2.4253792762756348 }, { "id": "identity_19517", "image": "UniGeo/images/345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,一块直角三角板的30°角的顶点P落在⊙O上,两边分别交⊙O于A、B两点,若⊙O的直径为4,则弦AB长为()\nChoices:\n(A) 2\n(B) 3\n(C) √{2}\n(D) √{3}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.203125, "loss": 2.7551345825195312 }, { "id": "identity_230548", "image": "UniGeo/images/1915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠C=90°,∠B=20°,以A为圆心,任意长为半径画弧分别交AB于点M和N,再分别以M、N为圆心,大于MN的长为半径画弧,两弧交于P,连接AP并延长交于点D,若AB=10,求AD的长" }, { "from": "gpt", "value": "The answer is 5 * tan(20°)" } ], "clip_score": 27.734375, "loss": 2.2913925647735596 }, { "id": "identity_334887", "image": "A-OKVQA/images/500900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of work were the people in yellow vests recently doing?\nChoices:\n(A) erecting bridge\n(B) concrete work\n(C) cone manufacture\n(D) arresting students" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.125, "loss": 4.5639729499816895 }, { "id": "identity_285979", "image": "A-OKVQA/images/515660.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the yellow substance used?\nChoices:\n(A) sweetening beverage\n(B) cleaning plate\n(C) cleaning hands\n(D) dipping sauce" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.1875, "loss": 4.3844146728515625 }, { "id": "identity_76118", "image": "AI2D/images/1324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If all plants die, which organism would be affected first?" }, { "from": "gpt", "value": "The answer is Primary consumers (rabbits, squirrels, mice, seed-eating birds, and herbivorous insects)" } ], "clip_score": 28.421875, "loss": 1.0521857738494873 }, { "id": "identity_29664", "image": "TabMWP/images/9784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A mail carrier counted how many pieces of mail she delivered each day. How many more pieces of mail did the mail carrier deliver on Saturday than on Wednesday?" }, { "from": "gpt", "value": "The answer is 102" } ], "clip_score": 37.1875, "loss": 2.617128372192383 }, { "id": "identity_189677", "image": "VQA2.0/images/551936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shoes the person playing with the ball is wearing?" }, { "from": "gpt", "value": "The answer is Pink and white" } ], "clip_score": 17.203125, "loss": 2.8090829849243164 }, { "id": "identity_220152", "image": "Super-CLEVR/images/superCLEVR_new_001230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the windows on the bus?" }, { "from": "gpt", "value": "The answer is Rectangular" } ], "clip_score": 16.578125, "loss": 2.8762922286987305 }, { "id": "identity_226251", "image": "PMC-VQA/images/PMC7402021_mrm28303-fig-0004_226287.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the effect on the MRI signal intensity if the magnetic field is changed from perpendicular to parallel with respect to the head, assuming all other conditions remain constant?" }, { "from": "gpt", "value": "The answer is The MRI signal intensity decreases" } ], "clip_score": 27.53125, "loss": 2.660010814666748 }, { "id": "identity_315143", "image": "PMC-VQA/images/PMC4058497_fig1_298383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which artery is surrounded by the inflamed tissue mentioned in the image caption?\nChoices:\n(A) External carotid artery\n(B) Basilar artery\n(C) Cerebral artery\n(D) Intracavernous branches of the internal carotid artery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.609375, "loss": 3.5690054893493652 }, { "id": "identity_309751", "image": "TQA/images/question_images/parts_ocean_floor_7240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many features are present on the ocean floor?\nChoices:\n(A) 9\n(B) 10\n(C) 8\n(D) 7" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.09375, "loss": 3.310033082962036 }, { "id": "identity_22115", "image": "AI2D/images/3849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Show the epidermal cell in the left diagram.\nChoices:\n(A) Label H\n(B) Label F\n(C) Label G\n(D) Label B" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.203125, "loss": 3.558793544769287 }, { "id": "identity_170994", "image": "TabMWP/images/7457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Felipe buys a science book and a textbook, how much change will he get from $50?" }, { "from": "gpt", "value": "The answer is $7.62" } ], "clip_score": 27.4375, "loss": 2.2881650924682617 }, { "id": "identity_98121", "image": "TabMWP/images/21624.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total revenue from Cheese pizza sales for all restaurants combined?" }, { "from": "gpt", "value": "The answer is $37" } ], "clip_score": 34.40625, "loss": 2.4464564323425293 }, { "id": "identity_146215", "image": "DocVQA/images/nrmg0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the remark for Room 329?" }, { "from": "gpt", "value": "The answer is Knoll" } ], "clip_score": 22.765625, "loss": 4.901393413543701 }, { "id": "identity_139007", "image": "PlotQA/images/21584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did the percentage of internet users exceed 2% for the first time?" }, { "from": "gpt", "value": "The answer is 2002" } ], "clip_score": 21.578125, "loss": 1.6817574501037598 }, { "id": "identity_264699", "image": "VQA-AS/images/14631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the boy engaged in?" }, { "from": "gpt", "value": "The answer is jumping rope" } ], "clip_score": 29.03125, "loss": 2.356065273284912 }, { "id": "identity_116035", "image": "VQA-RAD/images/synpic26158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible abnormality in the spleen?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.921875, "loss": 3.019479751586914 }, { "id": "identity_5421", "image": "TQA/images/question_images/parts_microscope_7199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which part of the microscope do we put a glass slide with specimen?\nChoices:\n(A) Mechanical Stage Control\n(B) Base\n(C) Objectives\n(D) Stand" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.6875, "loss": 3.873915910720825 }, { "id": "identity_278723", "image": "PMC-VQA/images/PMC8037832_Fig1_414845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of cancer is depicted in the computed tomography images among the following options: Prostate cancer, Breast cancer, Lung cancer, or Colon cancer?\nChoices:\n(A) Prostate cancer \n(B) Breast cancer \n(C) Lung cancer \n(D) Colon cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.5625, "loss": 3.775446653366089 }, { "id": "identity_71939", "image": "AI2D/images/4840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which foot type has the second toe longer than the big toe?" }, { "from": "gpt", "value": "The answer is Greek" } ], "clip_score": 35.59375, "loss": 3.2531142234802246 }, { "id": "identity_69489", "image": "AI2D/images/3310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled \"Golgi Apparatus\"?" }, { "from": "gpt", "value": "The answer is Modifying, sorting, and packaging proteins and lipids" } ], "clip_score": 21.546875, "loss": 1.6256494522094727 }, { "id": "identity_262745", "image": "Geometry3K/images/626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle JPL, point S is the incenter. What is the measure of angle SJP?\nChoices:\n(A) 24.5\n(B) 37.5\n(C) 56\n(D) 75" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.4375, "loss": 3.5715365409851074 }, { "id": "identity_135123", "image": "MapQA/images/map_8415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the percentage of goals for the state of Texas?" }, { "from": "gpt", "value": "The answer is 73.3%-84.2%" } ], "clip_score": 34.53125, "loss": 2.053900957107544 }, { "id": "identity_90727", "image": "Geometry3K/images/563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment WF?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 29.828125, "loss": 2.531550168991089 }, { "id": "identity_188442", "image": "VizWiz/images/VizWiz_train_00003668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the storage temperature recommended for this product?" }, { "from": "gpt", "value": "The answer is Below -18°C" } ], "clip_score": 17.03125, "loss": 2.7273459434509277 }, { "id": "identity_38995", "image": "FigureQA/images/56871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.09375, "loss": 3.776400089263916 }, { "id": "identity_49477", "image": "FigureQA/images/72916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis value does Olive reach its peak?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 23.6875, "loss": 2.8555641174316406 }, { "id": "identity_150714", "image": "ScienceQA/images/5030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the formal charge on the central atom?" }, { "from": "gpt", "value": "The answer is 0 (zero)" } ], "clip_score": 26.015625, "loss": 1.941008448600769 }, { "id": "identity_83096", "image": "UniGeo/images/980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, AB = AC, and ∠BAC = 45°, what is the length of segment CD?" }, { "from": "gpt", "value": "The answer is AB * sin(22.5°)" } ], "clip_score": 36.40625, "loss": 2.5391781330108643 }, { "id": "identity_268889", "image": "DocVQA/images/rlyv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is going to be distributed?" }, { "from": "gpt", "value": "The answer is CIGARS" } ], "clip_score": 14.8046875, "loss": 3.6699492931365967 }, { "id": "identity_156633", "image": "AI2D/images/809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role do bacteria and dead organic matter play in this ecosystem?" }, { "from": "gpt", "value": "The answer is Decomposers" } ], "clip_score": 27.890625, "loss": 4.144683837890625 }, { "id": "identity_107293", "image": "TextVQA/images/00ebf08a1a6e045c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players from the team in green are visible on the court?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.9375, "loss": 4.728398323059082 }, { "id": "identity_294225", "image": "IconQA/images/65045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of flowers present?" }, { "from": "gpt", "value": "The answer is 73" } ], "clip_score": 23.4375, "loss": 2.3340322971343994 }, { "id": "identity_275672", "image": "VQA2.0/images/434894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bowls?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 19.921875, "loss": 4.127554893493652 }, { "id": "identity_259615", "image": "DVQA/images/bar_train_00051339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms presented in the chart?" }, { "from": "gpt", "value": "The answer is 36.67" } ], "clip_score": 35.71875, "loss": 2.3004279136657715 }, { "id": "identity_16871", "image": "Super-CLEVR/images/superCLEVR_new_000053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the tiny mountain bike?" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 25.328125, "loss": 3.144543409347534 }, { "id": "identity_161222", "image": "GeoQA+/images/2703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is parallel to CD and angle DCE is 131 degrees, what is the measure of angle DEC?" }, { "from": "gpt", "value": "The answer is 49 degrees" } ], "clip_score": 23.96875, "loss": 1.6530420780181885 }, { "id": "identity_264563", "image": "ScienceQA/images/5267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which colony is depicted in the image shown? \nOptions:\n(A) New Hampshire\n(B) North Carolina\n(C) Vermont\n(D) Connecticut" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.515625, "loss": 4.2543110847473145 }, { "id": "identity_25301", "image": "IconQA/images/104009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of the line to the nearest centimeter. The line is about (_) centimeters long." }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.53125, "loss": 2.4139435291290283 }, { "id": "identity_294343", "image": "A-OKVQA/images/274541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which electronic devices are they utilizing? Options include cell phone, tablet, desktop computer, and laptop computer.\nChoices:\n(A) cell phone\n(B) tablet\n(C) desktop computer\n(D) laptop computer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.515625, "loss": 4.348315238952637 }, { "id": "identity_33712", "image": "PMC-VQA/images/PMC7921722_F1_380739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is defined in the vertical boxes?\nChoices:\n(A) DNA structure \n(B) Combinations of sub-divisions \n(C) Mathematical formulas \n(D) Genealogy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.34375, "loss": 4.314586162567139 }, { "id": "identity_119643", "image": "Super-CLEVR/images/superCLEVR_new_001121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 24.21875, "loss": 3.1229546070098877 }, { "id": "identity_115914", "image": "VQA-RAD/images/synpic31955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any abnormal opacity in the left lung field?" }, { "from": "gpt", "value": "The answer is No abnormal opacity in the left lung field" } ], "clip_score": 30.953125, "loss": 2.066908121109009 }, { "id": "identity_259329", "image": "TQA/images/abc_question_images/optics_reflection_19180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the angles represented by letters \"T\" and \"U\"?" }, { "from": "gpt", "value": "The answer is 2θ" } ], "clip_score": 23.453125, "loss": 2.9830403327941895 }, { "id": "identity_255248", "image": "PMC-VQA/images/PMC5547522_Fig2_200908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is undergoing duct expansion as indicated by the black arrow in the given image, and what might be the clinical significance of this finding?" }, { "from": "gpt", "value": "The answer is A glandular duct, possibly indicating obstruction or inflammation" } ], "clip_score": 32.25, "loss": 2.8634684085845947 }, { "id": "identity_270765", "image": "TextVQA/images/1c18369be23ddf63.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all of you fans of the Red Sox?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 27.140625, "loss": 4.602072715759277 }, { "id": "identity_209133", "image": "A-OKVQA/images/144618.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the ramp the skateboarder is using?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 27.859375, "loss": 3.653928756713867 }, { "id": "identity_239829", "image": "VQA2.0/images/298182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many spokes are visible on the front wheel of the yellow bike?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 25.953125, "loss": 1.881015658378601 }, { "id": "identity_117720", "image": "Super-CLEVR/images/superCLEVR_new_000266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the bus?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.984375, "loss": 2.0123512744903564 }, { "id": "identity_255779", "image": "ScienceQA/images/13956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the top magnet is flipped upside down, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is repel" } ], "clip_score": 25.640625, "loss": 2.2706058025360107 }, { "id": "identity_207434", "image": "A-OKVQA/images/54039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Overcast with recent rain" } ], "clip_score": 18.34375, "loss": 3.9761950969696045 }, { "id": "identity_171408", "image": "IconQA/images/98246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What transformation has been applied to the letter 'J' to obtain the resulting figure?" }, { "from": "gpt", "value": "The answer is The letter 'J' has been rotated and reflected" } ], "clip_score": 26.71875, "loss": 2.3155531883239746 }, { "id": "identity_128587", "image": "TextVQA/images/2a5578187a344221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the exercise ball in the image?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.53125, "loss": 2.751398801803589 }, { "id": "identity_100054", "image": "TabMWP/images/28907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fewer apples were picked on Monday compared to Friday?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 35.96875, "loss": 2.014333724975586 }, { "id": "identity_72833", "image": "ScienceQA/images/5687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 26.203125, "loss": 2.8921265602111816 }, { "id": "identity_57926", "image": "DocVQA/images/fskw0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount due for the invoice?" }, { "from": "gpt", "value": "The answer is 1985.00" } ], "clip_score": 25.3125, "loss": 1.7033237218856812 }, { "id": "identity_29453", "image": "DVQA/images/bar_train_00031104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people prefer the most preferred object?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.890625, "loss": 2.1943914890289307 }, { "id": "identity_104729", "image": "VQA-AS/images/12955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the curtains?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 18.203125, "loss": 3.9473514556884766 }, { "id": "identity_167455", "image": "TabMWP/images/31172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people ate macaroni and cheese less than 4 times last month?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 35.53125, "loss": 2.670064687728882 }, { "id": "identity_115540", "image": "VQA-AS/images/13069.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white checkered" } ], "clip_score": 26.40625, "loss": 2.672672986984253 }, { "id": "identity_196020", "image": "PMC-VQA/images/PMC6206376_Fig5_387414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the abnormality solid or cystic in nature based on the CT image?" }, { "from": "gpt", "value": "The answer is Solid or Cystic" } ], "clip_score": 32.03125, "loss": 2.782133102416992 }, { "id": "identity_198837", "image": "TextVQA/images/9883827b0efb232b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the venue where the event is taking place?" }, { "from": "gpt", "value": "The answer is Joy Club" } ], "clip_score": 23.171875, "loss": 4.097489833831787 }, { "id": "identity_209939", "image": "VQA2.0/images/134849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is predominantly featured in the image?" }, { "from": "gpt", "value": "The answer is Pizza" } ], "clip_score": 22.109375, "loss": 3.2180521488189697 }, { "id": "identity_69517", "image": "TQA/images/abc_question_images/cycle_carbon_15009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the absorption of CO2 by the ocean?" }, { "from": "gpt", "value": "The answer is V" } ], "clip_score": 31.28125, "loss": 2.733555793762207 }, { "id": "identity_225766", "image": "VQA2.0/images/318598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of kites visible in the image, and how many of each type are there?" }, { "from": "gpt", "value": "The answer is 4 rainbow-colored kites, 1 airplane-shaped kite" } ], "clip_score": 29.484375, "loss": 1.805766224861145 }, { "id": "identity_3063", "image": "VQA-AS/images/14232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the moon out?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.078125, "loss": 3.4717166423797607 }, { "id": "identity_310711", "image": "AI2D/images/1928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A food web is depicted here. Identify the incorrect pairing of organism and role.\nChoices:\n(A) Mallard-primary consumer\n(B) Coyote-tertiary consumer\n(C) Grasshopper-primary consumer\n(D) Reed canary grass-consumer" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.125, "loss": 3.0726237297058105 }, { "id": "identity_100689", "image": "TabMWP/images/6907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of bunches of bananas in 7 crates?" }, { "from": "gpt", "value": "The answer is 35 bunches of bananas" } ], "clip_score": 27.875, "loss": 1.2006186246871948 }, { "id": "identity_27843", "image": "PMC-VQA/images/PMC5136402_fig1_102061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tumor is present in the CT-scan images? \nChoices:\n(A) Osteosarcoma \n(B) Chondrosarcoma \n(C) Liposarcoma \n(D) Leimyosarcoma " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.4375, "loss": 3.862565517425537 }, { "id": "identity_335095", "image": "AI2D/images/2734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During which phase is the moon fully visible?\nChoices:\n(A) waxing moon\n(B) new moon\n(C) waning moon\n(D) full moon" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.9375, "loss": 3.6934943199157715 }, { "id": "identity_232478", "image": "GeoQA+/images/1064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,AB、AC的垂直平分线分别交BC于点E、F,若∠BAC=102°,且BE=EC,BF=FC,求∠EAF的值,并证明E、F、A三点共线。" }, { "from": "gpt", "value": "The answer is 51°" } ], "clip_score": 31.59375, "loss": 2.8865346908569336 }, { "id": "identity_10306", "image": "VQA2.0/images/96969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this in the U.S.?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 14.4609375, "loss": 3.206991672515869 }, { "id": "identity_242125", "image": "GeoQA+/images/5453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠B=60°,∠A=80°.延长BC至点D,若点E在AC上且∠BEC=90°,则∠EBC的大小为多少度?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 31.734375, "loss": 2.0238654613494873 }, { "id": "identity_106736", "image": "Super-CLEVR/images/superCLEVR_new_001158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the largest object in the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 16.71875, "loss": 4.369453430175781 }, { "id": "identity_1442", "image": "TabMWP/images/35154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each year, the campers at Jackie's summer camp try to set that summer's record for finishing the obstacle course as quickly as possible. According to the table, what was the rate of change between 2017 and 2018?" }, { "from": "gpt", "value": "The answer is -5" } ], "clip_score": 33.71875, "loss": 3.609231948852539 }, { "id": "identity_203388", "image": "A-OKVQA/images/564830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many action figures can be seen on top of the TV?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.6875, "loss": 2.491424322128296 }, { "id": "identity_119194", "image": "TextVQA/images/195c0c8a689ff595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport is likely being watched by the crowd?" }, { "from": "gpt", "value": "The answer is Football (Soccer)" } ], "clip_score": 24.328125, "loss": 2.1677606105804443 }, { "id": "identity_47510", "image": "FigureQA/images/15409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Gray section smaller than the Tomato section?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.875, "loss": 3.3867461681365967 }, { "id": "identity_251479", "image": "MapQA/images/map_8887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the percentages in states that border California?" }, { "from": "gpt", "value": "The answer is 16.03%" } ], "clip_score": 32.46875, "loss": 1.6386280059814453 }, { "id": "identity_200237", "image": "PMC-VQA/images/PMC3621952_pone-0060776-g002_197794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the possible function of the cells shown in the image?" }, { "from": "gpt", "value": "The answer is Active processes like division or gene expression" } ], "clip_score": 24.890625, "loss": 5.097567558288574 }, { "id": "identity_26454", "image": "A-OKVQA/images/83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are people doing here?\nChoices:\n(A) dancing\n(B) jogging\n(C) singing\n(D) buying food" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 17.921875, "loss": 4.053090572357178 }, { "id": "identity_227984", "image": "ScienceQA/images/11651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the ratio of the magnetic forces between the magnets in Pair 1 and Pair 2, given that the magnetic force between two magnets is inversely proportional to the square of the distance between them" }, { "from": "gpt", "value": "The answer is approximately 5.71" } ], "clip_score": 16.40625, "loss": 2.7366209030151367 }, { "id": "identity_176950", "image": "CLEVR-Math/images/CLEVR_train_022377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.921875, "loss": 2.5682058334350586 }, { "id": "identity_73335", "image": "TQA/images/abc_question_images/nuclear_energy_18113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final state of the cluster as indicated by the letter T in the diagram?" }, { "from": "gpt", "value": "The answer is A stable and compact cluster with no energy emission" } ], "clip_score": 28.9375, "loss": 4.138361930847168 }, { "id": "identity_314217", "image": "GeoQA+/images/1454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在⊙O中,AB=BC=CD,∠AOB=40°,则∠CAD的度数是多少?\nChoices:\n(A) 10°\n(B) 20°\n(C) 30°\n(D) 40°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.3125, "loss": 2.4347591400146484 }, { "id": "identity_157272", "image": "ScienceQA/images/1329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state highlighted in green shares the longest border with Canada?" }, { "from": "gpt", "value": "The answer is Maine" } ], "clip_score": 27.984375, "loss": 2.5303549766540527 }, { "id": "identity_88206", "image": "GeoQA+/images/889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ADG = 20°, what is the measure of ∠BFG in the folded rectangle?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 36.0625, "loss": 1.6553187370300293 }, { "id": "identity_304459", "image": "TQA/images/question_images/human_system_ear_2892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many components are in the hearing system?\nChoices:\n(A) 7\n(B) 12\n(C) 10\n(D) 13" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.875, "loss": 3.056736707687378 }, { "id": "identity_269467", "image": "IconQA/images/46696.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangles present?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 27.734375, "loss": 1.8767045736312866 }, { "id": "identity_36885", "image": "PMC-VQA/images/PMC7806997_Fig2_342619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of view is shown in the segmentation images?\nChoices:\n(A) Coronal \n(B) Sagittal \n(C) Axial \n(D) Oblique " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.953125, "loss": 4.006957054138184 }, { "id": "identity_157783", "image": "TQA/images/question_images/earth_moon_phases_6003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the phase where more than half but less than all of the moon's surface is illuminated and it appears to be shrinking?" }, { "from": "gpt", "value": "The answer is Waning Gibbous" } ], "clip_score": 31.578125, "loss": 1.599068284034729 }, { "id": "identity_6803", "image": "PMC-VQA/images/PMC7697381_Fig3_308553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging technique was used 6 months after the surgery? \nChoices:\n(A) CT scans \n(B) MRI scans \n(C) Anteroposterior and lateral plain radiographs \n(D) Flexion and extension radiographs " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.921875, "loss": 3.7857346534729004 }, { "id": "identity_134867", "image": "DVQA/images/bar_train_00165888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of people preferring baby less than half of those preferring faith?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.28125, "loss": 3.188246488571167 }, { "id": "identity_44839", "image": "MapQA/images/map_1471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of percentage hits for the state of Texas?" }, { "from": "gpt", "value": "The answer is 62.5%-71.8%" } ], "clip_score": 32.375, "loss": 1.9029991626739502 }, { "id": "identity_187129", "image": "A-OKVQA/images/126096.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is More than twenty people" } ], "clip_score": 15.203125, "loss": 3.214945077896118 }, { "id": "identity_297567", "image": "TabMWP/images/6607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many parking lots in the city had a car count between 42 and 55 cars, according to the director of transportation's study on public parking?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.015625, "loss": 2.5628840923309326 }, { "id": "identity_130390", "image": "A-OKVQA/images/422157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked behind the bus?" }, { "from": "gpt", "value": "The answer is Car" } ], "clip_score": 20.3125, "loss": 3.6581223011016846 }, { "id": "identity_166287", "image": "TabMWP/images/34163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total sales amount for Fizzy Fun over the weekend?" }, { "from": "gpt", "value": "The answer is $27" } ], "clip_score": 27.109375, "loss": 2.6296207904815674 }, { "id": "identity_50447", "image": "MapQA/images/map_7767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of payments for the state of Texas?" }, { "from": "gpt", "value": "The answer is 629-629" } ], "clip_score": 32.625, "loss": 2.280444622039795 }, { "id": "identity_74042", "image": "ScienceQA/images/7476.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the meaning of \"belligerent\" as per the image?" }, { "from": "gpt", "value": "The answer is Hostile or argumentative" } ], "clip_score": 27.9375, "loss": 2.182748794555664 }, { "id": "identity_337643", "image": "TextVQA/images/5eb42a802c355fe8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of books are in this section?" }, { "from": "gpt", "value": "The answer is fiction" } ], "clip_score": 20.859375, "loss": 3.4802322387695312 }, { "id": "identity_196967", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Coronal" } ], "clip_score": 28.171875, "loss": 4.01563024520874 }, { "id": "identity_98403", "image": "TabMWP/images/19579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of all x and y values?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 21.296875, "loss": 2.085766553878784 }, { "id": "identity_100144", "image": "TabMWP/images/19343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of push-ups done?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 22.25, "loss": 2.674957275390625 }, { "id": "identity_60634", "image": "FigureQA/images/9753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the category 'Light Slate' more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.71875, "loss": 3.354234218597412 }, { "id": "identity_324058", "image": "VQA2.0/images/67749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the vehicle's headlights on?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.8828125, "loss": 3.875136613845825 }, { "id": "identity_214561", "image": "PMC-VQA/images/PMC7572012_F1_278008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the mass solid or cystic in nature?" }, { "from": "gpt", "value": "The answer is Solid" } ], "clip_score": 19.9375, "loss": 2.9598991870880127 }, { "id": "identity_145066", "image": "DocVQA/images/pmgl0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the street address of the temporary offices where the meeting will be held?" }, { "from": "gpt", "value": "The answer is 120 Wall Street, New York, N.Y" } ], "clip_score": 21.609375, "loss": 1.3023244142532349 }, { "id": "identity_227954", "image": "GeoQA+/images/552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given △ABC with AB=6, AC=5, D is the midpoint of AB, E is a point on AC such that ∠ADE=∠C, and the angle bisector of ∠BAC intersects DE and BC at points F and G respectively. If the ratio \\(\\frac{AF}{AG}\\) is known to be \\(\\frac{3}{5}\\), find the length of segment AG" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 34.15625, "loss": 2.48134708404541 }, { "id": "identity_141048", "image": "DocVQA/images/jsgn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the initials of the person who wrote the exhibit note on the top right corner?" }, { "from": "gpt", "value": "The answer is CCR" } ], "clip_score": 22.4375, "loss": 3.7904083728790283 }, { "id": "identity_137073", "image": "ChartQA/images/multi_col_80653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the highest percentage of Independent telecom tower sites?" }, { "from": "gpt", "value": "The answer is Italy" } ], "clip_score": 27.28125, "loss": 3.2864232063293457 }, { "id": "identity_207267", "image": "VQA2.0/images/169907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many computer monitors are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.53125, "loss": 2.383720874786377 }, { "id": "identity_301211", "image": "VQA-RAD/images/synpic25105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there air in this?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 15.2578125, "loss": 3.3843719959259033 }, { "id": "identity_135630", "image": "DVQA/images/bar_train_00092201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for 'author'?" }, { "from": "gpt", "value": "The answer is Debate" } ], "clip_score": 26.125, "loss": 3.4169352054595947 }, { "id": "identity_97809", "image": "IconQA/images/7194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the time is 10:00, how many hours until the clock shows 2:00?" }, { "from": "gpt", "value": "The answer is 4 hours" } ], "clip_score": 30.328125, "loss": 2.103696584701538 }, { "id": "identity_222185", "image": "TextVQA/images/0108c3e0b84eb653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 21.078125, "loss": 3.686760425567627 }, { "id": "identity_54896", "image": "FigureQA/images/61719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum value reached by the Gold line?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 28.828125, "loss": 2.7161877155303955 }, { "id": "identity_67493", "image": "AI2D/images/518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of voles decreases, what would be the immediate effect on the Elk population?" }, { "from": "gpt", "value": "The answer is The Elk population may increase" } ], "clip_score": 31.703125, "loss": 3.1043713092803955 }, { "id": "identity_71711", "image": "TQA/images/abc_question_images/parts_microscope_17194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'S' refer to on this microscope?" }, { "from": "gpt", "value": "The answer is Stage" } ], "clip_score": 36.59375, "loss": 3.060184955596924 }, { "id": "identity_61566", "image": "ChartQA/images/multi_col_843.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did Agriculture have a GDP percentage greater than 27%?" }, { "from": "gpt", "value": "The answer is 7 years" } ], "clip_score": 25.40625, "loss": 2.2470481395721436 }, { "id": "identity_148728", "image": "DocVQA/images/rnjf0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number of this document?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.625, "loss": 2.2628190517425537 }, { "id": "identity_97714", "image": "TabMWP/images/33737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who spelled the least words correctly in the spelling bee?" }, { "from": "gpt", "value": "The answer is Tanvi" } ], "clip_score": 34.90625, "loss": 2.4888651371002197 }, { "id": "identity_153348", "image": "ScienceQA/images/15081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders the western side of Texas?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 27.125, "loss": 2.297712564468384 }, { "id": "identity_36870", "image": "DocVQA/images/kzbn0226_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What should EPA exclude from the Scopes of TSCA Risk Evaluations?" }, { "from": "gpt", "value": "The answer is Exclude Low Exposure Conditions of Use" } ], "clip_score": 28.84375, "loss": 2.5612967014312744 }, { "id": "identity_264022", "image": "TextVQA/images/3d61184009535bb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: For what age group is this toy intended?" }, { "from": "gpt", "value": "The answer is 6+ mos" } ], "clip_score": 26.8125, "loss": 2.9911227226257324 }, { "id": "identity_306078", "image": "PMC-VQA/images/PMC8539564_jpm-11-01041-f001_93437.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the most likely diagnosis based on these findings?\nChoices:\n(A) Metastatic cancer\n(B) Hemangioma\n(C) Hepatocellular carcinoma\n(D) Fibrolamellar carcinoma" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.09375, "loss": 4.74868106842041 }, { "id": "identity_170629", "image": "TabMWP/images/23493.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of peanuts per bag of trail mix?" }, { "from": "gpt", "value": "The answer is 58" } ], "clip_score": 26.90625, "loss": 2.8214762210845947 }, { "id": "identity_328749", "image": "VizWiz/images/VizWiz_train_00019552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this look moldy at all?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.296875, "loss": 3.016096591949463 }, { "id": "identity_151421", "image": "ScienceQA/images/12017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is depicted in the image?" }, { "from": "gpt", "value": "The answer is Leisure suit" } ], "clip_score": 25.8125, "loss": 2.1689817905426025 }, { "id": "identity_23464", "image": "TQA/images/question_images/human_system_digestive_3680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What connects the mouth to the stomach?\nChoices:\n(A) pharynx\n(B) esophagus\n(C) tongue\n(D) liver" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.140625, "loss": 4.353404521942139 }, { "id": "identity_97638", "image": "IconQA/images/73154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more orange circles on the left side or the right side of the image?" }, { "from": "gpt", "value": "The answer is Equal on both sides" } ], "clip_score": 37.75, "loss": 2.9207427501678467 }, { "id": "identity_90177", "image": "UniGeo/images/433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠CBD = 2∠BDC and ∠BAC = 44° in circle A, find the measure of ∠ABC" }, { "from": "gpt", "value": "The answer is 158°" } ], "clip_score": 36.0, "loss": 2.1036529541015625 }, { "id": "identity_321695", "image": "VQA2.0/images/405695.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this photo taken at a place where items are sold?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.28125, "loss": 3.5213825702667236 }, { "id": "identity_104649", "image": "A-OKVQA/images/353231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition depicted in the image?" }, { "from": "gpt", "value": "The answer is Overcast or cloudy" } ], "clip_score": 16.140625, "loss": 2.590339183807373 }, { "id": "identity_114451", "image": "VizWiz/images/VizWiz_train_00010445.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the shaker's body?" }, { "from": "gpt", "value": "The answer is Cylindrical" } ], "clip_score": 27.109375, "loss": 2.5153191089630127 }, { "id": "identity_46526", "image": "DocVQA/images/pkxm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the two organizations mentioned at the top of the page?" }, { "from": "gpt", "value": "The answer is Swedish Nutrition Foundation and Swedish Medical Research Council" } ], "clip_score": 23.09375, "loss": 1.36576509475708 }, { "id": "identity_327322", "image": "PMC-VQA/images/PMC4210452_Fig1_330612.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of MRI image is shown here?\nChoices:\n(A) Sagittal\n(B) Axial\n(C) Coronal\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 4.453988075256348 }, { "id": "identity_178445", "image": "CLEVR-Math/images/CLEVR_train_007892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove the purple sphere, how many objects remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.40625, "loss": 2.1074447631835938 }, { "id": "identity_90299", "image": "Geometry3K/images/844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line segment y?" }, { "from": "gpt", "value": "The answer is 6√3" } ], "clip_score": 29.9375, "loss": 2.7732925415039062 }, { "id": "identity_174832", "image": "CLEVR-Math/images/CLEVR_train_014822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple objects are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.203125, "loss": 2.43554425239563 }, { "id": "identity_283207", "image": "A-OKVQA/images/144018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which material are these boats constructed?\nChoices:\n(A) oak wood\n(B) bamboo\n(C) eucalyptus\n(D) birch wood" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.28125, "loss": 4.182916641235352 }, { "id": "identity_106304", "image": "VQA2.0/images/221691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the image?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 20.28125, "loss": 2.2924633026123047 }, { "id": "identity_168997", "image": "TabMWP/images/9507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the slope of the line if these points were plotted on a graph?" }, { "from": "gpt", "value": "The answer is 8.5" } ], "clip_score": 24.359375, "loss": 2.5954787731170654 }, { "id": "identity_198716", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is the white arrow pointing to?" }, { "from": "gpt", "value": "The answer is Nasal cavity" } ], "clip_score": 29.9375, "loss": 2.831514596939087 }, { "id": "identity_270666", "image": "ScienceQA/images/17953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Oklahoma?\n(A) Oklahoma City\n(B) Tallahassee\n(C) Jefferson City\n(D) Atlanta" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.25, "loss": 4.175487995147705 }, { "id": "identity_190474", "image": "TextVQA/images/65d28c3dea03ea1d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is being advertised on the red banner at the bottom of the image?" }, { "from": "gpt", "value": "The answer is European Qualifiers" } ], "clip_score": 23.125, "loss": 3.301032781600952 }, { "id": "identity_206560", "image": "PMC-VQA/images/PMC5999462_F1_328205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of sinusitis in this image?" }, { "from": "gpt", "value": "The answer is Yes, evidence of sinusitis is present" } ], "clip_score": 30.453125, "loss": 2.274611473083496 }, { "id": "identity_142994", "image": "DVQA/images/bar_train_00024517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the 'golf' object across all categories?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 29.21875, "loss": 2.5204684734344482 }, { "id": "identity_250264", "image": "DVQA/images/bar_train_00065883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the knife and the watch combined across all categories?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 25.140625, "loss": 2.5210139751434326 }, { "id": "identity_97164", "image": "IconQA/images/103963.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the nail including the head to the nearest half inch?" }, { "from": "gpt", "value": "The answer is 2.5 inches" } ], "clip_score": 35.96875, "loss": 1.546773910522461 }, { "id": "identity_231855", "image": "TQA/images/abc_question_images/nuclear_energy_18122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two particles are involved in the fusion process, and what is the resulting particle?" }, { "from": "gpt", "value": "The answer is T and E, resulting in W" } ], "clip_score": 31.34375, "loss": 2.9155189990997314 }, { "id": "identity_12921", "image": "VQA2.0/images/205116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the white hair barber young?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 28.703125, "loss": 3.817997694015503 }, { "id": "identity_206367", "image": "VQA2.0/images/221674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Afternoon" } ], "clip_score": 15.1328125, "loss": 2.6407525539398193 }, { "id": "identity_302260", "image": "TQA/images/question_images/human_system_eye_6122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many parts of this does light pass through?\nChoices:\n(A) 2\n(B) 4\n(C) 5\n(D) 3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.8828125, "loss": 3.751434326171875 }, { "id": "identity_242835", "image": "TQA/images/question_images/ocean_waves_7126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the height of the wave is 4 cm, what is the total vertical distance between the crest and the trough?" }, { "from": "gpt", "value": "The answer is 8 cm" } ], "clip_score": 36.5625, "loss": 1.6740771532058716 }, { "id": "identity_295559", "image": "DVQA/images/bar_train_00041295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.6875, "loss": 3.9598917961120605 }, { "id": "identity_146187", "image": "DocVQA/images/kxmg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the employee number of George R Morrison?" }, { "from": "gpt", "value": "The answer is 637676" } ], "clip_score": 23.046875, "loss": 2.5374562740325928 }, { "id": "identity_296294", "image": "DVQA/images/bar_train_00070215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of bars in each group?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 19.171875, "loss": 3.4605019092559814 }, { "id": "identity_54682", "image": "ChartQA/images/multi_col_80074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between daily access of News/magazine and Sport games or matches?" }, { "from": "gpt", "value": "The answer is 39%" } ], "clip_score": 26.765625, "loss": 2.102353096008301 }, { "id": "identity_305243", "image": "AI2D/images/2600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At which point is the tide at its peak?\nChoices:\n(A) Spring tide\n(B) Neap tide\n(C) Quarter tide\n(D) Winter tide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.03125, "loss": 4.194924354553223 }, { "id": "identity_227042", "image": "Super-CLEVR/images/superCLEVR_new_000529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are the same color as the small matte regular bus, and what are their types?" }, { "from": "gpt", "value": "The answer is Zero vehicles" } ], "clip_score": 30.4375, "loss": 5.6896491050720215 }, { "id": "identity_269048", "image": "TQA/images/question_images/hair_follicles_6987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the component that precedes the Hair shaft in the hair structure?\nChoices:\n(A) Sebaceous gland\n(B) Inner root\n(C) Arrector pili muscle\n(D) Outer root" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.15625, "loss": 4.650452136993408 }, { "id": "identity_148318", "image": "PlotQA/images/9167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did any country have a decrease in coverage percentage from one year to the next?" }, { "from": "gpt", "value": "The answer is No, all countries show an increase or consistent coverage" } ], "clip_score": 20.28125, "loss": 2.7983615398406982 }, { "id": "identity_101103", "image": "IconQA/images/68502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple triangles are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.03125, "loss": 2.2038514614105225 }, { "id": "identity_286440", "image": "TextVQA/images/5515f3d3b673ed49.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which company's logo is displayed on her jersey?" }, { "from": "gpt", "value": "The answer is uhisport" } ], "clip_score": 23.578125, "loss": 4.114529132843018 }, { "id": "identity_317545", "image": "IconQA/images/5336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show two activities Kimi did on Tuesday after lunch. Which activity happened first?\nChoices:\n(A) drew a picture\n(B) went to the park" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.671875, "loss": 3.588076114654541 }, { "id": "identity_253248", "image": "TabMWP/images/36443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of roses across all bushes that have more than 50 roses?" }, { "from": "gpt", "value": "The answer is 292" } ], "clip_score": 29.484375, "loss": 2.0784192085266113 }, { "id": "identity_125421", "image": "VQA-AS/images/591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the grass in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 24.65625, "loss": 3.0102310180664062 }, { "id": "identity_47611", "image": "DocVQA/images/tqjm0223_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the quantity of the Journal Supplement being produced?" }, { "from": "gpt", "value": "The answer is 43,000" } ], "clip_score": 19.453125, "loss": 2.176309823989868 }, { "id": "identity_35978", "image": "TabMWP/images/29302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The Ashland Soup Company recorded the number of rotten tomatoes in each barrel it received. How many barrels had at least 30 rotten tomatoes but less than 90 rotten tomatoes?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 31.296875, "loss": 2.1668388843536377 }, { "id": "identity_321329", "image": "MapQA/images/map_1215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value for this state?" }, { "from": "gpt", "value": "The answer is 66.7%-77.3%" } ], "clip_score": 21.140625, "loss": 2.156339168548584 }, { "id": "identity_163789", "image": "Geometry3K/images/490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of y if the quadrilateral is a parallelogram?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 38.0625, "loss": 2.4671711921691895 }, { "id": "identity_139849", "image": "DocVQA/images/mkln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transmission?" }, { "from": "gpt", "value": "The answer is STD" } ], "clip_score": 22.265625, "loss": 4.128004550933838 }, { "id": "identity_292234", "image": "PMC-VQA/images/PMC4567369_f01_422971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of sequence was utilized to capture the image among the following choices: (A) Sagittal, (B) T1-weighted, (C) Axial, or (D) Fluorescence?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.546875, "loss": 3.9683144092559814 }, { "id": "identity_35536", "image": "GeoQA+/images/5801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,AD为⊙O的直径,交BC于点E,若DE=2,OE=3,则tanC•tanB=()\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.125, "loss": 2.5046675205230713 }, { "id": "identity_187347", "image": "A-OKVQA/images/166474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 11.5390625, "loss": 3.4239494800567627 }, { "id": "identity_109250", "image": "TextVQA/images/006500d2b983306e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the website advertised on the train in the image?" }, { "from": "gpt", "value": "The answer is VEGAS.com" } ], "clip_score": 19.8125, "loss": 2.2285149097442627 }, { "id": "identity_228693", "image": "ScienceQA/images/21203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and name its capital city" }, { "from": "gpt", "value": "The answer is Santo Domingo" } ], "clip_score": 28.0, "loss": 3.1166751384735107 }, { "id": "identity_93611", "image": "TabMWP/images/17121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of participants in the trivia game?" }, { "from": "gpt", "value": "The answer is 73" } ], "clip_score": 33.5625, "loss": 2.7088541984558105 }, { "id": "identity_336215", "image": "IconQA/images/97473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the spinner more likely to land on?\nChoices:\n(A) white\n(B) green" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.171875, "loss": 4.3253397941589355 }, { "id": "identity_43571", "image": "DocVQA/images/qjgn0226_21.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contract number associated with this request?" }, { "from": "gpt", "value": "The answer is 402.2" } ], "clip_score": 23.109375, "loss": 3.0064711570739746 }, { "id": "identity_100179", "image": "IconQA/images/101749.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the time on Ed's clock was 30 minutes earlier, what time would it be?" }, { "from": "gpt", "value": "The answer is 3:30" } ], "clip_score": 23.5, "loss": 1.6576323509216309 }, { "id": "identity_131150", "image": "A-OKVQA/images/67749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the woman standing next to?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 24.453125, "loss": 3.701533555984497 }, { "id": "identity_316025", "image": "A-OKVQA/images/235672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is this place using covered gazebos?\nChoices:\n(A) too icy\n(B) stay dry\n(C) too sunny\n(D) too windy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.84375, "loss": 4.193076133728027 }, { "id": "identity_142183", "image": "DVQA/images/bar_train_00005718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the chart?" }, { "from": "gpt", "value": "The answer is Most preferred objects" } ], "clip_score": 31.125, "loss": 2.7633349895477295 }, { "id": "identity_50029", "image": "DocVQA/images/lqbh0024_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days will Phase II take to complete?" }, { "from": "gpt", "value": "The answer is 1 Week" } ], "clip_score": 20.4375, "loss": 3.1033177375793457 }, { "id": "identity_161309", "image": "GeoQA+/images/2495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O, what is the measure of ∠ADB?" }, { "from": "gpt", "value": "The answer is 75°" } ], "clip_score": 35.21875, "loss": 2.2458178997039795 }, { "id": "identity_294060", "image": "DVQA/images/bar_train_00115208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which dataset is represented by the color steelblue?" }, { "from": "gpt", "value": "The answer is herd" } ], "clip_score": 26.21875, "loss": 2.4795658588409424 }, { "id": "identity_335881", "image": "A-OKVQA/images/397186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who are the two entities being judged?\nChoices:\n(A) dog dog\n(B) woman\n(C) man man\n(D) dog man" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.375, "loss": 3.93231201171875 }, { "id": "identity_289862", "image": "TabMWP/images/16075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of baskets that Cora made during basketball practice each day, based on the recorded data?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 36.71875, "loss": 2.9350087642669678 }, { "id": "identity_267908", "image": "VQA2.0/images/353485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the individuals?" }, { "from": "gpt", "value": "The answer is sidewalk" } ], "clip_score": 19.21875, "loss": 3.622344970703125 }, { "id": "identity_270291", "image": "IconQA/images/63338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the boundary of the shape?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.0, "loss": 2.2292840480804443 }, { "id": "identity_154533", "image": "ScienceQA/images/13178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique items are there in total on Elise and Terrell's want lists?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.046875, "loss": 2.242462158203125 }, { "id": "identity_337528", "image": "PMC-VQA/images/PMC5383222_pone.0174620.g002_150301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the visual map shown in the image caption?\nChoices:\n(A) Relative Apparent Contrast Map\n(B) Parametric Response Map\n(C) Apparent Diffusion Coefficient Map\n(D) Absolute Enhancement Map" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.260622978210449 }, { "id": "identity_171016", "image": "IconQA/images/57775.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you were to cut the line into three equal parts, how many paper clips would each part measure?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.5, "loss": 2.2789995670318604 }, { "id": "identity_94208", "image": "IconQA/images/104023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each triangle represents a piece of pie, how many pieces are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.359375, "loss": 2.3318045139312744 }, { "id": "identity_292481", "image": "PMC-VQA/images/PMC8464884_jcm-10-04057-f002_66122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the focus of measurement in CBCT scans? \nChoices:\n(A) Tooth alignment\n(B) Retromolar space\n(C) Gum thickness\n(D) Cavity depth" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.109375, "loss": 4.662387847900391 }, { "id": "identity_280299", "image": "VizWiz/images/VizWiz_train_00002564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the sign?" }, { "from": "gpt", "value": "The answer is this sign to rent bicycle" } ], "clip_score": 36.4375, "loss": 3.5118820667266846 }, { "id": "identity_109619", "image": "A-OKVQA/images/47481.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many oranges are visible in the fruit basket?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 13.8359375, "loss": 4.421996593475342 }, { "id": "identity_227855", "image": "GeoQA+/images/316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,D为直径AB的延长线上一点,DC切⊙O于点G,若∠A=35°,求∠DGC的大小。" }, { "from": "gpt", "value": "The answer is 35°" } ], "clip_score": 30.578125, "loss": 1.8456227779388428 }, { "id": "identity_332705", "image": "PMC-VQA/images/PMC7711293_F1_313327.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the black arrow in this represent?\nChoices:\n(A) Fibrosis \n(B) Necrosis \n(C) Inflammation \n(D) None of the above" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.328125, "loss": 4.321688175201416 }, { "id": "identity_241991", "image": "TabMWP/images/24229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which movie has the longest duration, and what is its duration in hours and minutes?" }, { "from": "gpt", "value": "The answer is Spaceships and Rockets or Aliens from Outer Space, 2 hours 45 minutes" } ], "clip_score": 27.875, "loss": 2.050034999847412 }, { "id": "identity_336506", "image": "TabMWP/images/14226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many turtles are in some cages if each cage has 10 turtles?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 38.9375, "loss": 1.9715549945831299 }, { "id": "identity_94738", "image": "IconQA/images/40481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of lids to cups?" }, { "from": "gpt", "value": "The answer is 1:2" } ], "clip_score": 35.5, "loss": 1.5636636018753052 }, { "id": "identity_189784", "image": "PMC-VQA/images/PMC8145021_pharmaceuticals-14-00403-f001_448672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of pleural effusion in this image?" }, { "from": "gpt", "value": "The answer is No signs of pleural effusion" } ], "clip_score": 26.796875, "loss": 2.6264989376068115 }, { "id": "identity_147428", "image": "DocVQA/images/fmbb0228_14.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the State Laboratory Director for New York State?" }, { "from": "gpt", "value": "The answer is Alan R. Hinman, M.D" } ], "clip_score": 21.5625, "loss": 2.6421942710876465 }, { "id": "identity_160895", "image": "GeoQA+/images/3658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the line segment AD?" }, { "from": "gpt", "value": "The answer is AD = 20.4" } ], "clip_score": 22.765625, "loss": 2.77024245262146 }, { "id": "identity_165557", "image": "GeoQA+/images/3707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC in the triangle ABC?" }, { "from": "gpt", "value": "The answer is 15°" } ], "clip_score": 33.28125, "loss": 1.8793182373046875 }, { "id": "identity_186414", "image": "VQA2.0/images/474069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 15.53125, "loss": 1.9661909341812134 }, { "id": "identity_72752", "image": "ScienceQA/images/19429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly east of Oregon?" }, { "from": "gpt", "value": "The answer is Idaho" } ], "clip_score": 27.484375, "loss": 2.5284130573272705 }, { "id": "identity_197819", "image": "A-OKVQA/images/250645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible street lanes are there in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.5625, "loss": 4.4990553855896 }, { "id": "identity_125101", "image": "A-OKVQA/images/401842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the natural feature visible in the background?" }, { "from": "gpt", "value": "The answer is The sea or ocean" } ], "clip_score": 22.671875, "loss": 3.6389665603637695 }, { "id": "identity_64937", "image": "AI2D/images/327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many layers contain shell fossils?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.859375, "loss": 2.4016315937042236 }, { "id": "identity_121420", "image": "VQA-AS/images/9361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of bird is flying in the sky?" }, { "from": "gpt", "value": "The answer is Pigeon" } ], "clip_score": 21.875, "loss": 2.593183755874634 }, { "id": "identity_227540", "image": "ScienceQA/images/18967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the image, identify the type of candy shown and describe its typical texture and flavor profile" }, { "from": "gpt", "value": "The answer is Hard texture, fruity or sweet flavor" } ], "clip_score": 26.53125, "loss": 3.2663567066192627 }, { "id": "identity_10954", "image": "PMC-VQA/images/PMC2767135_F2_48950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the magnetic resonance images show? \nChoices:\n(A) Normal brain \n(B) Areas of low signal intensity \n(C) Areas of high signal intensity in the thalamic areas \n(D) Areas of high signal intensity in the frontal lobe. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.5, "loss": 5.146282196044922 }, { "id": "identity_238669", "image": "DVQA/images/bar_train_00174309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of saline across all categories?" }, { "from": "gpt", "value": "The answer is 111,011,000" } ], "clip_score": 25.46875, "loss": 2.205716609954834 }, { "id": "identity_280384", "image": "ScienceQA/images/17518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states listed, which state is located the farthest east? (A) New Hampshire, (B) Louisiana, (C) New Jersey, or (D) North Carolina." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.625, "loss": 3.7927277088165283 }, { "id": "identity_95877", "image": "TabMWP/images/3680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $355, what is the difference between the quantity supplied and the quantity demanded?" }, { "from": "gpt", "value": "The answer is The difference is 14,100" } ], "clip_score": 32.8125, "loss": 2.5598506927490234 }, { "id": "identity_245277", "image": "PlotQA/images/39475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of primary and secondary education combined for all countries where the duration of secondary education is greater than 2.4 years?" }, { "from": "gpt", "value": "The answer is 33.5 years" } ], "clip_score": 34.875, "loss": 1.95754873752594 }, { "id": "identity_247213", "image": "ScienceQA/images/17619.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the physical adaptations of a crocodile that help it survive in its environment, including its mouth, skin, and claws" }, { "from": "gpt", "value": "The answer is Pointed teeth, tough skin, sharp claws" } ], "clip_score": 28.796875, "loss": 3.0699539184570312 }, { "id": "identity_25860", "image": "TextVQA/images/0002d070329eb0fc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does it say next to the red stars?" }, { "from": "gpt", "value": "The answer is heineken" } ], "clip_score": 14.5, "loss": 2.2812578678131104 }, { "id": "identity_77718", "image": "AI2D/images/3075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many centrioles are present in this cell diagram?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.703125, "loss": 2.1020514965057373 }, { "id": "identity_214048", "image": "A-OKVQA/images/176736.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of activity are the individuals on the blue boards likely preparing for?" }, { "from": "gpt", "value": "The answer is Surfing" } ], "clip_score": 27.5, "loss": 2.418447971343994 }, { "id": "identity_96736", "image": "TabMWP/images/1633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of orange candies per bag?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 37.25, "loss": 3.2587831020355225 }, { "id": "identity_290132", "image": "Super-CLEVR/images/superCLEVR_new_001042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are either large cyan rubber objects in front of the brown airplane or brown shiny airplanes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.1875, "loss": 2.370410919189453 }, { "id": "identity_210265", "image": "PMC-VQA/images/PMC6787035_Fig1_49003.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the lesion located in the left or right hemisphere of the brain?" }, { "from": "gpt", "value": "The answer is Right hemisphere" } ], "clip_score": 28.703125, "loss": 2.799417734146118 }, { "id": "identity_44799", "image": "ChartQA/images/14726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of respondents believe that negotiations are the best way to achieve statehood?" }, { "from": "gpt", "value": "The answer is 15%" } ], "clip_score": 30.921875, "loss": 1.47710382938385 }, { "id": "identity_152268", "image": "ScienceQA/images/6128.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which peninsula is visible to the west of the highlighted country?" }, { "from": "gpt", "value": "The answer is Tiburon Peninsula" } ], "clip_score": 23.109375, "loss": 2.5325801372528076 }, { "id": "identity_103270", "image": "CLEVR-Math/images/CLEVR_train_065762.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cubes than cylinders?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.265625, "loss": 3.0396790504455566 }, { "id": "identity_28160", "image": "ScienceQA/images/15007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird's beak is also adapted to catch insects?\nChoices:\n(A) European nightjar\n(B) Australian pelican" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.1875, "loss": 3.464930295944214 }, { "id": "identity_59662", "image": "DVQA/images/bar_train_00080168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of accuracy values for the algorithm 'inch'?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.59375, "loss": 3.3781158924102783 }, { "id": "identity_118894", "image": "Super-CLEVR/images/superCLEVR_new_001376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the scooter closer to the golden car or the red car?" }, { "from": "gpt", "value": "The answer is Golden car" } ], "clip_score": 27.1875, "loss": 2.9374799728393555 }, { "id": "identity_236617", "image": "IconQA/images/17891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of marbles per row if there are approximately 70 marbles in total and there are 7 rows of marbles?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 23.21875, "loss": 2.047855854034424 }, { "id": "identity_29861", "image": "DVQA/images/bar_train_00143393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the smallest bar?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.09375, "loss": 2.4385368824005127 }, { "id": "identity_308002", "image": "TextVQA/images/5e8e568972c5869e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this place?" }, { "from": "gpt", "value": "The answer is airport" } ], "clip_score": 28.953125, "loss": 2.8379125595092773 }, { "id": "identity_328192", "image": "IconQA/images/78687.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.65625, "loss": 2.2250349521636963 }, { "id": "identity_255017", "image": "DocVQA/images/gsgj0223_100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value in rupees of raw materials and stores and spare parts consumed in the current year?" }, { "from": "gpt", "value": "The answer is 58,77,92,148 rupees" } ], "clip_score": 25.5, "loss": 1.929703712463379 }, { "id": "identity_206857", "image": "Super-CLEVR/images/superCLEVR_new_001686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that appears to be metallic?" }, { "from": "gpt", "value": "The answer is Yes, the small brown chopper" } ], "clip_score": 23.765625, "loss": 4.201470375061035 }, { "id": "identity_240411", "image": "FigureQA/images/20568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the largest and smallest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 30%" } ], "clip_score": 32.90625, "loss": 1.7142119407653809 }, { "id": "identity_110580", "image": "PMC-VQA/images/PMC8727688_f2_156113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique was used to produce these images?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT) scan" } ], "clip_score": 31.84375, "loss": 1.7301899194717407 }, { "id": "identity_33995", "image": "AI2D/images/1878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the given food web, name any two herbivores?\nChoices:\n(A) coyote, bobcat\n(B) dingo, jack rabbit\n(C) dingo, bobcat\n(D) roadrunner&jack rabbit" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.859375, "loss": 2.7560677528381348 }, { "id": "identity_118686", "image": "VQA2.0/images/205116.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the barber's shirt?" }, { "from": "gpt", "value": "The answer is Blue-striped" } ], "clip_score": 29.71875, "loss": 3.5722508430480957 }, { "id": "identity_299087", "image": "PMC-VQA/images/PMC3858590_Fig2_250830.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What radiographic finding is present in the right breast?\nChoices:\n(A) Decreased breast density\n(B) Increased breast density\n(C) Mass in the upper outer breast\n(D) Calcifications in the lower inner breast" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.3125, "loss": 4.5015363693237305 }, { "id": "identity_296834", "image": "A-OKVQA/images/48034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What activity is the boy engaged in?\nChoices:\n(A) stretching\n(B) running\n(C) descending\n(D) ascending" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 8.1484375, "loss": 4.558874607086182 }, { "id": "identity_112900", "image": "TextVQA/images/8774ea9366a4c286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the traffic light visible in the image?" }, { "from": "gpt", "value": "The answer is Square" } ], "clip_score": 26.84375, "loss": 2.9970881938934326 }, { "id": "identity_263252", "image": "VQA-RAD/images/synpic26925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the lesion result in herniation of the brainstem?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.328125, "loss": 2.8623595237731934 }, { "id": "identity_223457", "image": "VizWiz/images/VizWiz_train_00004054.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the iPod in the image?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 31.34375, "loss": 3.1587090492248535 }, { "id": "identity_15552", "image": "DVQA/images/bar_train_00051589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the values in the chart presented in a logarithmic scale?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.28125, "loss": 3.6681647300720215 }, { "id": "identity_244575", "image": "DocVQA/images/xnyc0227_84.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sample size of boys and girls combined?" }, { "from": "gpt", "value": "The answer is 5,000" } ], "clip_score": 19.375, "loss": 1.8931013345718384 }, { "id": "identity_75496", "image": "ScienceQA/images/2180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What ocean habitat is this fish commonly found in?" }, { "from": "gpt", "value": "The answer is Coral reefs" } ], "clip_score": 31.15625, "loss": 1.9631258249282837 }, { "id": "identity_118527", "image": "VQA2.0/images/103940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels does the chair have?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.828125, "loss": 4.507306098937988 }, { "id": "identity_117933", "image": "A-OKVQA/images/360595.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the ground?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 18.640625, "loss": 4.793999195098877 }, { "id": "identity_75648", "image": "AI2D/images/4195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main types of rocks are included in the rock cycle diagram?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 36.1875, "loss": 3.9255642890930176 }, { "id": "identity_73313", "image": "TQA/images/question_images/cellular_respiration_9046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many water molecules are produced at the end of the electron transport stage?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 27.421875, "loss": 1.9061311483383179 }, { "id": "identity_60155", "image": "FigureQA/images/85028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of x-axis values for Medium Periwinkle?" }, { "from": "gpt", "value": "The answer is 0 to 100" } ], "clip_score": 28.0625, "loss": 1.749353051185608 }, { "id": "identity_219114", "image": "A-OKVQA/images/418030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the people engaged in?" }, { "from": "gpt", "value": "The answer is Flying kites" } ], "clip_score": 24.515625, "loss": 2.18497896194458 }, { "id": "identity_74417", "image": "AI2D/images/3002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the absence of a structure where the second yellow arrow from the top is pointing?" }, { "from": "gpt", "value": "The answer is No bud" } ], "clip_score": 22.5, "loss": 3.5205657482147217 }, { "id": "identity_157335", "image": "AI2D/images/2314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the environment required for the Aedes Mosquito eggs to hatch?" }, { "from": "gpt", "value": "The answer is Stagnant water" } ], "clip_score": 36.9375, "loss": 2.040649175643921 }, { "id": "identity_127061", "image": "TextVQA/images/6e6b232838ea3209.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible branding on the front of the monitor?" }, { "from": "gpt", "value": "The answer is Yes, LG logo is visible" } ], "clip_score": 27.34375, "loss": 2.5644781589508057 }, { "id": "identity_55583", "image": "FigureQA/images/73303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Web Green have a consistent decrease in value?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.421875, "loss": 2.8833930492401123 }, { "id": "identity_228509", "image": "Geometry3K/images/1855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the perimeter of the parallelogram" }, { "from": "gpt", "value": "The answer is 116 mm" } ], "clip_score": 31.53125, "loss": 2.3641905784606934 }, { "id": "identity_231675", "image": "CLEVR-Math/images/CLEVR_train_063908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 1 red matte sphere and remove 2 blue matte cylinders, how many objects exist?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 29.0625, "loss": 2.372138738632202 }, { "id": "identity_29198", "image": "VQA2.0/images/8025.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are these people riding?" }, { "from": "gpt", "value": "The answer is bikes" } ], "clip_score": 29.578125, "loss": 3.1105594635009766 }, { "id": "identity_121903", "image": "VQA-AS/images/8734.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.578125, "loss": 4.241202354431152 }, { "id": "identity_137438", "image": "PlotQA/images/32689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the amount of services exported between 2008 and 2012?" }, { "from": "gpt", "value": "The answer is 10000000000" } ], "clip_score": 28.4375, "loss": 1.4267979860305786 }, { "id": "identity_208308", "image": "TextVQA/images/0d401ff381cbf336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text in the SANYO advertisement?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 32.625, "loss": 2.8365070819854736 }, { "id": "identity_297323", "image": "VQA2.0/images/511930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of people currently wearing aprons?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.234375, "loss": 2.540926218032837 }, { "id": "identity_286229", "image": "DVQA/images/bar_train_00134965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who prefer the most favored object in the entire chart?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 29.921875, "loss": 2.588207244873047 }, { "id": "identity_310700", "image": "Geometry3K/images/33.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given shape A B C D, A B = 2 x + 3 and B C = 5 x. Find x.\nChoices:\n(A) 1\n(B) 2\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.6875, "loss": 2.614699125289917 }, { "id": "identity_85105", "image": "UniGeo/images/2074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment BD in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is √2" } ], "clip_score": 35.9375, "loss": 2.574073314666748 }, { "id": "identity_88446", "image": "UniGeo/images/1359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠APB = 50°, what is the measure of ∠PBC?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.125, "loss": 1.8150440454483032 }, { "id": "identity_48694", "image": "DocVQA/images/mjvg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections are listed before \"Medical Care For Public Assistance Recipients\"?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 12.6328125, "loss": 2.430551767349243 }, { "id": "identity_66023", "image": "ScienceQA/images/830.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is located to the north of the highlighted state?" }, { "from": "gpt", "value": "The answer is Arkansas" } ], "clip_score": 31.203125, "loss": 2.8014352321624756 }, { "id": "identity_285258", "image": "DVQA/images/bar_train_00172615.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In any category, which object is the most favored?" }, { "from": "gpt", "value": "The answer is ear" } ], "clip_score": 25.546875, "loss": 3.2290008068084717 }, { "id": "identity_65593", "image": "TQA/images/abc_question_images/earth_eclipses_10684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter indicates the area where a partial eclipse is visible?" }, { "from": "gpt", "value": "The answer is U" } ], "clip_score": 24.265625, "loss": 3.57080078125 }, { "id": "identity_945", "image": "FigureQA/images/66722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Green Yellow the smoothest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.765625, "loss": 4.364634037017822 }, { "id": "identity_285075", "image": "VizWiz/images/VizWiz_train_00012329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you inform me about what this is?" }, { "from": "gpt", "value": "The answer is ibuprofen 200mg liquid softgels" } ], "clip_score": 34.375, "loss": 1.4478726387023926 }, { "id": "identity_177570", "image": "IconQA/images/78007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of marbles per row?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.15625, "loss": 2.219785690307617 }, { "id": "identity_300562", "image": "TextVQA/images/438617e13660aa6e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the bottle?" }, { "from": "gpt", "value": "The answer is elijah craig" } ], "clip_score": 36.1875, "loss": 1.998382568359375 }, { "id": "identity_76967", "image": "TQA/images/question_images/human_system_respiratory_3594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the space surrounding the lungs?" }, { "from": "gpt", "value": "The answer is Pleural cavity" } ], "clip_score": 32.90625, "loss": 2.0923616886138916 }, { "id": "identity_217736", "image": "PMC-VQA/images/PMC4174767_F6_322464.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brain images are displayed in the VBM imaging modality?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.53125, "loss": 2.1599931716918945 }, { "id": "identity_163596", "image": "GeoQA+/images/31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if PA = PB, PB = 4, and PD = 3, what is the ratio of the area of triangle APD to the area of triangle BPD?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 34.28125, "loss": 1.4988553524017334 }, { "id": "identity_221001", "image": "A-OKVQA/images/180824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the chalk drawings on the street?" }, { "from": "gpt", "value": "The answer is Various colors" } ], "clip_score": 16.171875, "loss": 3.377878189086914 }, { "id": "identity_207897", "image": "VQA2.0/images/480495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the kite closest to the top edge of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.96875, "loss": 2.8567922115325928 }, { "id": "identity_258547", "image": "A-OKVQA/images/335784.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sign that says \"no parking\" in the background above the pack of bikers, and what is the shape of the sign to the left of the bikers?" }, { "from": "gpt", "value": "The answer is Red and white, diamond" } ], "clip_score": 20.8125, "loss": 3.351534366607666 }, { "id": "identity_183658", "image": "PMC-VQA/images/PMC7031477_Fig5_121311.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the anatomical structure labeled with the letter 'b'?" }, { "from": "gpt", "value": "The answer is Mandibular angle and ramus" } ], "clip_score": 32.875, "loss": 3.73970627784729 }, { "id": "identity_334193", "image": "PMC-VQA/images/PMC6878646_Fig1_74197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging methods are used together to define the BTV in SBRT-PATHY?\nChoices:\n(A) CT and MRI\n(B) PET and CT\n(C) X-ray and ultrasound\n(D) CT and ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.78125, "loss": 3.5725274085998535 }, { "id": "identity_11415", "image": "IconQA/images/78555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use paper clips to measure the line. The line is about (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.796875, "loss": 2.492297887802124 }, { "id": "identity_233648", "image": "IconQA/images/16407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vertices in all the hearts combined?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.046875, "loss": 1.9165860414505005 }, { "id": "identity_236669", "image": "GeoQA+/images/5375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,五边形ABCDE关于过点A的直线l轴对称,若∠DAE=40°,∠ADE=60°,求五边形ABCDE的内角和是多少度?" }, { "from": "gpt", "value": "The answer is 540°" } ], "clip_score": 24.734375, "loss": 1.9232537746429443 }, { "id": "identity_252289", "image": "VQA2.0/images/170513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indications that this event is taking place outdoors, and what might be the purpose of the gathering?" }, { "from": "gpt", "value": "The answer is Outdoor event, likely a ceremonial or official gathering" } ], "clip_score": 18.109375, "loss": 3.028048515319824 }, { "id": "identity_327150", "image": "IconQA/images/107178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are visible?\nChoices:\n(A) 1\n(B) 5\n(C) 3\n(D) 6\n(E) 9" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.796875, "loss": 3.225499391555786 }, { "id": "identity_52783", "image": "DVQA/images/bar_train_00151878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the 'thing' category in the 'shift' group?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.4375, "loss": 2.777524709701538 }, { "id": "identity_269257", "image": "ScienceQA/images/6995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the solutions, Solution A or Solution B, has a higher concentration of purple particles, or are their concentrations the same?\nChoices:\n(A) Solution A\n(B) Solution B\n(C) neither; their concentrations are the same" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.78125, "loss": 3.4571926593780518 }, { "id": "identity_192754", "image": "PMC-VQA/images/PMC9290596_papr13055-fig-0001_343188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is primarily shown in this X-ray image?" }, { "from": "gpt", "value": "The answer is Lumbar spine" } ], "clip_score": 30.625, "loss": 3.0080487728118896 }, { "id": "identity_146312", "image": "DocVQA/images/xrcy0227_32.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount approved for payment?" }, { "from": "gpt", "value": "The answer is 140.93" } ], "clip_score": 23.59375, "loss": 2.1311283111572266 }, { "id": "identity_106289", "image": "A-OKVQA/images/117683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the backpack on the left?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 25.5625, "loss": 3.5638022422790527 }, { "id": "identity_21962", "image": "MapQA/images/map_2728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend a continuous bar ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.8515625, "loss": 3.265002489089966 }, { "id": "identity_263328", "image": "DocVQA/images/npjh0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the paternal parent of Mr. Bunker?" }, { "from": "gpt", "value": "The answer is ELLSWORTH BUNKER" } ], "clip_score": 25.921875, "loss": 1.9445761442184448 }, { "id": "identity_296206", "image": "ScienceQA/images/5371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the material used to make this binder?\nChoices:\n(A) plastic\n(B) ceramic" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.5625, "loss": 4.122715950012207 }, { "id": "identity_166608", "image": "TabMWP/images/22634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Carly buys 5 star-shaped silver beads, how much change will she receive from a $10 bill?" }, { "from": "gpt", "value": "The answer is $7.75" } ], "clip_score": 31.21875, "loss": 2.0339550971984863 }, { "id": "identity_199003", "image": "PMC-VQA/images/PMC6052908_F2_343746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification level is used in these images?" }, { "from": "gpt", "value": "The answer is x200" } ], "clip_score": 30.734375, "loss": 2.241347551345825 }, { "id": "identity_81228", "image": "GeoQA+/images/1290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle APB in the given image?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 31.515625, "loss": 2.055253505706787 }, { "id": "identity_220192", "image": "A-OKVQA/images/546378.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the shelf the cat is sitting on?" }, { "from": "gpt", "value": "The answer is Wood" } ], "clip_score": 30.671875, "loss": 3.044241189956665 }, { "id": "identity_5757", "image": "A-OKVQA/images/270193.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the silo behind the train store?\nChoices:\n(A) grain\n(B) water\n(C) coal\n(D) corn" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.03125, "loss": 4.604970455169678 }, { "id": "identity_227515", "image": "Super-CLEVR/images/superCLEVR_new_001271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image that are the same color as the tiny shiny thing, including the tiny shiny thing itself?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 20.4375, "loss": 2.144571542739868 }, { "id": "identity_167841", "image": "TabMWP/images/32177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of train tickets sold from Monday to Thursday?" }, { "from": "gpt", "value": "The answer is 2557" } ], "clip_score": 37.6875, "loss": 2.3812990188598633 }, { "id": "identity_265275", "image": "TextVQA/images/0a2045afdf194803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand is imprinted on the golf ball?" }, { "from": "gpt", "value": "The answer is tiruron" } ], "clip_score": 37.53125, "loss": 4.004448413848877 }, { "id": "identity_236748", "image": "A-OKVQA/images/328810.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific words is the woman likely trying to form with the magnets on the refrigerator?" }, { "from": "gpt", "value": "The answer is The cat in the hat" } ], "clip_score": 18.6875, "loss": 3.085768222808838 }, { "id": "identity_305222", "image": "A-OKVQA/images/318702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which film's sequel might these individuals be part of the cast for?\nChoices:\n(A) american beauty\n(B) downton abbey\n(C) blue velvet\n(D) us" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.0859375, "loss": 3.682529926300049 }, { "id": "identity_126655", "image": "TextVQA/images/8c59e48619bd4871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total hits do the 'METS' have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 14.6171875, "loss": 2.3075249195098877 }, { "id": "identity_302509", "image": "TQA/images/abc_question_images/parts_skeleton_17270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify this bone.\nChoices:\n(A) C\n(B) J\n(C) U\n(D) W" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.484375, "loss": 3.9642703533172607 }, { "id": "identity_212264", "image": "TextVQA/images/78a5a7902e284852.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of fuel is advertised on the outfield wall?" }, { "from": "gpt", "value": "The answer is Gulf" } ], "clip_score": 23.234375, "loss": 2.509614944458008 }, { "id": "identity_329876", "image": "TQA/images/question_images/optics_lense_types_9162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does this type of lens do to light?\nChoices:\n(A) diffract it\n(B) focus it\n(C) refract it\n(D) reflect it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.359375, "loss": 4.185992240905762 }, { "id": "identity_278504", "image": "TabMWP/images/15753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among Bert's classmates who disclosed the number of science articles they read, who read the least number of science articles? \n(A) Bert \n(B) Alexa \n(C) Sam \n(D) Janelle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.125, "loss": 3.0990471839904785 }, { "id": "identity_2780", "image": "PMC-VQA/images/PMC7788390_F1_336143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the blue lines in the image indicate? \nChoices:\n(A) Wind direction \n(B) Longitude \n(C) Streams \n(D) Currents. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.1875, "loss": 4.452095985412598 }, { "id": "identity_220107", "image": "TextVQA/images/48e0031719e4c803.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this book a first edition?" }, { "from": "gpt", "value": "The answer is Yes, it is a first edition" } ], "clip_score": 20.3125, "loss": 2.09061336517334 }, { "id": "identity_208860", "image": "VQA2.0/images/406282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the water container on the table?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.40625, "loss": 3.5190722942352295 }, { "id": "identity_275973", "image": "A-OKVQA/images/329469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is the man on the right riding? \nOptions:\n(A) scooter\n(B) quad\n(C) bicycle\n(D) motorcycle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.25, "loss": 4.3280205726623535 }, { "id": "identity_95227", "image": "TabMWP/images/17660.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what would be the Y value when X is 7?" }, { "from": "gpt", "value": "The answer is 49" } ], "clip_score": 29.828125, "loss": 2.4469780921936035 }, { "id": "identity_235449", "image": "A-OKVQA/images/467256.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific elements in the image indicate that the area beyond the barrier is dangerous?" }, { "from": "gpt", "value": "The answer is Danger! Keep Out\" sign, construction materials, uneven flooring" } ], "clip_score": 26.46875, "loss": 3.443340539932251 }, { "id": "identity_25687", "image": "MapQA/images/map_172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border Indiana , does Kentucky have the highest value ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.25, "loss": 2.659672260284424 }, { "id": "identity_334250", "image": "CLEVR-Math/images/CLEVR_train_057125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 1 small red sphere. How many items are there?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.265625, "loss": 2.050400495529175 }, { "id": "identity_158252", "image": "ScienceQA/images/18872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region did not have any states voting for Douglas?" }, { "from": "gpt", "value": "The answer is West" } ], "clip_score": 26.84375, "loss": 2.8443446159362793 }, { "id": "identity_134265", "image": "DocVQA/images/slcn0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organization is responsible for the publication of this document?" }, { "from": "gpt", "value": "The answer is National Research Council of The National Academies" } ], "clip_score": 30.796875, "loss": 2.041252374649048 }, { "id": "identity_141923", "image": "DocVQA/images/pkpj0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plant species contain caffeine according to the text?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 19.828125, "loss": 2.333753824234009 }, { "id": "identity_30351", "image": "A-OKVQA/images/198120.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What setting does this seem to be?\nChoices:\n(A) school\n(B) office\n(C) library\n(D) home" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.6796875, "loss": 4.609669208526611 }, { "id": "identity_194072", "image": "A-OKVQA/images/496362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any construction work indicated in the image?" }, { "from": "gpt", "value": "The answer is Yes, indicated by the barrier and cone" } ], "clip_score": 25.40625, "loss": 2.83030366897583 }, { "id": "identity_255973", "image": "TabMWP/images/31378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of photographs taken per day over the past 5 days, and how does this compare to the median number of photographs taken per day?" }, { "from": "gpt", "value": "The answer is Average is 3.4, Median is 4" } ], "clip_score": 37.03125, "loss": 2.006073236465454 }, { "id": "identity_147469", "image": "DocVQA/images/ftbf0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the city and state location for Dr. I.K. Vasil's department?" }, { "from": "gpt", "value": "The answer is Gainesville, Florida" } ], "clip_score": 18.140625, "loss": 2.237342357635498 }, { "id": "identity_225371", "image": "PMC-VQA/images/PMC3292556_pone-0032429-g007_128046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical features can be identified in the sagittal sections of a mouse, and how do these features help in determining the species and type of section?" }, { "from": "gpt", "value": "The answer is Mouse brain, spinal cord, sagittal section" } ], "clip_score": 27.25, "loss": 3.46101713180542 }, { "id": "identity_265962", "image": "GeoQA+/images/428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point D lies on the arc AC of the circumcircle of isosceles triangle ABC, where AB=AC and ∠CAB=56°. What is the measure of angle ADC? \n(A) 116°\n(B) 118°\n(C) 122°\n(D) 126°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.78125, "loss": 2.3904964923858643 }, { "id": "identity_119027", "image": "A-OKVQA/images/260430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many snowboards are visible in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.453125, "loss": 2.4338107109069824 }, { "id": "identity_177160", "image": "TabMWP/images/3703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of lawn mowers sold?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 35.96875, "loss": 3.4409148693084717 }, { "id": "identity_72951", "image": "AI2D/images/3279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the plant cell diagram, which labeled part is the site for protein synthesis?" }, { "from": "gpt", "value": "The answer is Ribosomes" } ], "clip_score": 28.171875, "loss": 1.6327084302902222 }, { "id": "identity_99495", "image": "IconQA/images/74499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of the spinner landing on white?" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 23.3125, "loss": 1.968257188796997 }, { "id": "identity_38531", "image": "PMC-VQA/images/PMC7921762_JDS-22-71-g003.tif_380749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the plane of the cross-sectional view in the preoperative computed tomography?\nChoices:\n(A) Coronal \n(B) Sagittal \n(C) Transverse \n(D) Longitudinal " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 4.237465858459473 }, { "id": "identity_248302", "image": "PMC-VQA/images/PMC5946363_F1_313145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the postoperative CT scan, what are the changes in the infarcted tissue and the surrounding brain structures, and how do these changes compare to the preoperative scan?" }, { "from": "gpt", "value": "The answer is Swelling of infarcted tissue and increased mass effect on structures" } ], "clip_score": 33.4375, "loss": 3.533996820449829 }, { "id": "identity_329297", "image": "PMC-VQA/images/PMC6352060_F1_428944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI was used to image this?\nChoices:\n(A) T1-weighted MRI\n(B) DWI MRI\n(C) FLAIR MRI\n(D) T2-weighted MRI" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.03125, "loss": 3.690871477127075 }, { "id": "identity_333515", "image": "A-OKVQA/images/107247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which country does this green and white bus likely operate?\nChoices:\n(A) france\n(B) uk\n(C) germany\n(D) usa" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.296875, "loss": 3.884319305419922 }, { "id": "identity_127639", "image": "PMC-VQA/images/PMC5075973_F2_87014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the MRI sequence used in this image?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI sequence" } ], "clip_score": 34.90625, "loss": 1.6451231241226196 }, { "id": "identity_29067", "image": "PMC-VQA/images/PMC6945265_f1_95060.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain appears to be well separated in the image? \nChoices:\n(A) Thalami \n(B) Mesencephalon \n(C) Septum pellucidum \n(D) Interhemispheric fissure " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.859375, "loss": 4.483381748199463 }, { "id": "identity_43218", "image": "FigureQA/images/19765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 29.6875, "loss": 3.0911948680877686 }, { "id": "identity_241201", "image": "DVQA/images/bar_train_00013016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'tape' dataset across all categories?" }, { "from": "gpt", "value": "The answer is 7.5" } ], "clip_score": 31.875, "loss": 2.161156177520752 }, { "id": "identity_63231", "image": "ScienceQA/images/18243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of the objects shown would likely have the highest potential for kinetic energy transfer?" }, { "from": "gpt", "value": "The answer is Pogo stick" } ], "clip_score": 16.171875, "loss": 2.565035581588745 }, { "id": "identity_243513", "image": "ScienceQA/images/17742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and name its capital. Then, list the capitals of the two states directly north and south of it" }, { "from": "gpt", "value": "The answer is Richmond, Annapolis, Raleigh" } ], "clip_score": 29.40625, "loss": 3.680539131164551 }, { "id": "identity_264506", "image": "GeoQA+/images/3176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the degree measure of angle 1 when a set square is positioned on a ruler as shown in the diagram?\nChoices:\n(A) 95°\n(B) 100°\n(C) 105°\n(D) 110°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.5, "loss": 3.3080825805664062 }, { "id": "identity_313687", "image": "GeoQA+/images/4018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 将一个半径为9,圆心角为120°的扇形,卷成一个如图形状的圆锥(无重叠),则这个圆锥底面圆的半径是多少?\n选项:\n(A) 1\n(B) 2\n(C) 3\n(D) π" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.4375, "loss": 2.5208077430725098 }, { "id": "identity_172369", "image": "IconQA/images/66661.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time difference between when Vijay and Patrick got the newspaper?" }, { "from": "gpt", "value": "The answer is 55 minutes" } ], "clip_score": 19.140625, "loss": 2.0689024925231934 }, { "id": "identity_216417", "image": "VQA2.0/images/66459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of building is on the right side of the image?" }, { "from": "gpt", "value": "The answer is Commercial building" } ], "clip_score": 25.34375, "loss": 2.5926434993743896 }, { "id": "identity_10866", "image": "IconQA/images/71469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is seventh?\nChoices:\n(A) dog\n(B) paw\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 3.966312885284424 }, { "id": "identity_18748", "image": "DVQA/images/bar_train_00109098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.015625, "loss": 3.7419776916503906 }, { "id": "identity_38943", "image": "FigureQA/images/20013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the pie chart does the Coral section represent?" }, { "from": "gpt", "value": "The answer is 33.33%" } ], "clip_score": 32.3125, "loss": 1.1655395030975342 }, { "id": "identity_54925", "image": "FigureQA/images/10678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest portion in the pie chart?" }, { "from": "gpt", "value": "The answer is Firebrick" } ], "clip_score": 36.59375, "loss": 2.6525866985321045 }, { "id": "identity_51147", "image": "DVQA/images/bar_train_00001183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum accuracy value for the algorithm 'cream'?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.203125, "loss": 2.9400217533111572 }, { "id": "identity_215533", "image": "PMC-VQA/images/PMC4470509_pone.0128153.g016_397631.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality can be observed in the left lung?" }, { "from": "gpt", "value": "The answer is There is a white mass in the left lung" } ], "clip_score": 32.21875, "loss": 2.7334964275360107 }, { "id": "identity_67465", "image": "ScienceQA/images/3073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the plate?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 26.453125, "loss": 3.5724709033966064 }, { "id": "identity_85175", "image": "Geometry3K/images/386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC in the triangle?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 34.875, "loss": 1.9080430269241333 }, { "id": "identity_192759", "image": "PMC-VQA/images/PMC3287112_F1_127457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of skull fracture in the image?" }, { "from": "gpt", "value": "The answer is No skull fracture is visible" } ], "clip_score": 26.578125, "loss": 2.576105833053589 }, { "id": "identity_288835", "image": "TabMWP/images/17713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money Abdul requires to purchase both a frying pan and a coffee pot?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 32.0, "loss": 2.998429536819458 }, { "id": "identity_209924", "image": "VQA2.0/images/143507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number of the bus?" }, { "from": "gpt", "value": "The answer is 57489-MT" } ], "clip_score": 26.765625, "loss": 3.103668212890625 }, { "id": "identity_264078", "image": "ScienceQA/images/18589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated in the highlighted area?\nChoices:\n(A) Australia\n(B) North America\n(C) Africa\n(D) Asia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.59375, "loss": 3.9859790802001953 }, { "id": "identity_200502", "image": "VQA-AS/images/17424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather condition is represented by the cloud in the sky?" }, { "from": "gpt", "value": "The answer is Partly cloudy" } ], "clip_score": 15.9453125, "loss": 2.8982019424438477 }, { "id": "identity_125891", "image": "VQA-AS/images/15172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bench?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 24.15625, "loss": 3.1681830883026123 }, { "id": "identity_288283", "image": "A-OKVQA/images/345644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item among the following is the coldest? \n(A) car engine\n(B) snow\n(C) fan\n(D) refrigerator" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.5078125, "loss": 4.2636823654174805 }, { "id": "identity_189451", "image": "VizWiz/images/VizWiz_train_00012568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cap of the shampoo bottle?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 28.4375, "loss": 3.6125881671905518 }, { "id": "identity_145071", "image": "DocVQA/images/kzph0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What university is associated with Dr. W. Clarke Wescoe?" }, { "from": "gpt", "value": "The answer is The University of Kansas" } ], "clip_score": 23.125, "loss": 1.9748198986053467 }, { "id": "identity_268493", "image": "TQA/images/question_images/human_system_digestive_3735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After passing through the oesophagus, where does the food go next?\nChoices:\n(A) liver\n(B) stomach\n(C) pancreas\n(D) large intestine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.75, "loss": 3.788522720336914 }, { "id": "identity_3347", "image": "A-OKVQA/images/550028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What just happened to the ball?\nChoices:\n(A) got hit\n(B) foul ball\n(C) caught\n(D) lost" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.984375, "loss": 4.446103096008301 }, { "id": "identity_202789", "image": "PMC-VQA/images/PMC8044904_f1_417434.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the abnormality highlighted in the MRI image?" }, { "from": "gpt", "value": "The answer is Oval" } ], "clip_score": 32.8125, "loss": 3.3967459201812744 }, { "id": "identity_328386", "image": "A-OKVQA/images/143569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which national museum is nearby?\nChoices:\n(A) bar\n(B) temple\n(C) print\n(D) grafton" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2578125, "loss": 4.041921138763428 }, { "id": "identity_308714", "image": "VQA-RAD/images/synpic13385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side shows more extensive infiltration?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 20.828125, "loss": 3.9360804557800293 }, { "id": "identity_220558", "image": "A-OKVQA/images/239936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is displayed on the field logo?" }, { "from": "gpt", "value": "The answer is 2001" } ], "clip_score": 19.09375, "loss": 1.6566622257232666 }, { "id": "identity_212734", "image": "VQA-AS/images/11962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth on the table?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.875, "loss": 4.060790061950684 }, { "id": "identity_92743", "image": "TabMWP/images/30204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of cars sold in the first quarter of the year (January to March)?" }, { "from": "gpt", "value": "The answer is 254" } ], "clip_score": 30.859375, "loss": 2.5727593898773193 }, { "id": "identity_312683", "image": "TabMWP/images/23124.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Carrie owns six dogs and tracks their monthly food consumption. What fraction of the food was consumed by Chance? Simplify your answer." }, { "from": "gpt", "value": "The answer is 15/46" } ], "clip_score": 31.390625, "loss": 2.269517421722412 }, { "id": "identity_257696", "image": "ScienceQA/images/12199.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 90 degrees clockwise, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is attract" } ], "clip_score": 23.296875, "loss": 3.1098456382751465 }, { "id": "identity_756", "image": "VizWiz/images/VizWiz_train_00017303.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is recycle bin" } ], "clip_score": 30.609375, "loss": 2.4986870288848877 }, { "id": "identity_227223", "image": "FigureQA/images/66717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the difference in the area under the curve between this color and Cadet Blue?" }, { "from": "gpt", "value": "The answer is Green, 1250" } ], "clip_score": 26.28125, "loss": 3.3043007850646973 }, { "id": "identity_332468", "image": "A-OKVQA/images/290196.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why are they skiing on flat terrain?\nChoices:\n(A) cross-country\n(B) beginners\n(C) easier\n(D) safer" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.71875, "loss": 3.5998878479003906 }, { "id": "identity_297514", "image": "A-OKVQA/images/91105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which British sport could this ball potentially be utilized?\nChoices:\n(A) rugby\n(B) soccer\n(C) bowls\n(D) golf" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.28125, "loss": 4.076837062835693 }, { "id": "identity_309481", "image": "Geometry3K/images/1487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine A C.\nChoices:\n(A) 7\n(B) 7 \\sqrt { 3 }\n(C) 14\n(D) 14 \\sqrt { 3 }" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.734375, "loss": 3.6335055828094482 }, { "id": "identity_199165", "image": "PMC-VQA/images/PMC5615764_Fig1_220556.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the center of the image, surrounded by ribs?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 24.765625, "loss": 4.056412696838379 }, { "id": "identity_208748", "image": "PMC-VQA/images/PMC5585368_Fig8_211838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar measurement at the bottom of the image?" }, { "from": "gpt", "value": "The answer is 50 µm" } ], "clip_score": 19.96875, "loss": 1.9096343517303467 }, { "id": "identity_277786", "image": "VizWiz/images/VizWiz_train_00004446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is kleenex box" } ], "clip_score": 33.375, "loss": 2.622284412384033 }, { "id": "identity_41630", "image": "DVQA/images/bar_train_00049935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the 'taste' category for 'brush'?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.0625, "loss": 2.5952296257019043 }, { "id": "identity_184254", "image": "PMC-VQA/images/PMC6132558_Fig5_366058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical term for the structure indicated by the green arrow?" }, { "from": "gpt", "value": "The answer is Coronary artery" } ], "clip_score": 31.421875, "loss": 2.123486042022705 }, { "id": "identity_248025", "image": "PMC-VQA/images/PMC7031565_os12599-fig-0006_121402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which vertebrae have both a transverse process and a foramen transversarium?" }, { "from": "gpt", "value": "The answer is cervical vertebrae" } ], "clip_score": 27.078125, "loss": 1.8086248636245728 }, { "id": "identity_193677", "image": "A-OKVQA/images/518109.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the laptop protection plan being advertised?" }, { "from": "gpt", "value": "The answer is $99.99" } ], "clip_score": 25.34375, "loss": 1.6742836236953735 }, { "id": "identity_125208", "image": "VQA2.0/images/539317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Cloudy and possibly stormy" } ], "clip_score": 20.453125, "loss": 2.642420768737793 }, { "id": "identity_64857", "image": "ScienceQA/images/18649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What item is common in both Ronald and Tammy's want list?" }, { "from": "gpt", "value": "The answer is water" } ], "clip_score": 31.9375, "loss": 3.32816743850708 }, { "id": "identity_44702", "image": "FigureQA/images/36147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Slate double the value of Deep Pink?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 32.5, "loss": 3.1764044761657715 }, { "id": "identity_223008", "image": "PMC-VQA/images/PMC3590937_f1_190170.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Nasal passages" } ], "clip_score": 27.71875, "loss": 3.140955924987793 }, { "id": "identity_251929", "image": "A-OKVQA/images/98493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activities are most likely to be conducted in this space, considering the age group that mostly utilizes it?" }, { "from": "gpt", "value": "The answer is Classes, sports, clubs, and social events" } ], "clip_score": 19.4375, "loss": 2.9813427925109863 }, { "id": "identity_143845", "image": "MapQA/images/map_525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the states in the Northeast?" }, { "from": "gpt", "value": "The answer is 81" } ], "clip_score": 23.359375, "loss": 2.2476322650909424 }, { "id": "identity_80517", "image": "Geometry3K/images/841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the measures of angles 1, 2, and 3?" }, { "from": "gpt", "value": "The answer is 208 degrees" } ], "clip_score": 34.34375, "loss": 2.4646706581115723 }, { "id": "identity_146952", "image": "DVQA/images/bar_train_00073502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the highest accuracy for the 'day' dataset?" }, { "from": "gpt", "value": "The answer is brain" } ], "clip_score": 33.84375, "loss": 3.850883960723877 }, { "id": "identity_290600", "image": "IconQA/images/12595.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the number displayed?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 16.015625, "loss": 2.503676652908325 }, { "id": "identity_56538", "image": "FigureQA/images/28226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Brown slice occupy more than half of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.53125, "loss": 3.3494560718536377 }, { "id": "identity_241218", "image": "VQA2.0/images/246728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest the car is in motion, and how do they collectively confirm this?" }, { "from": "gpt", "value": "The answer is Indicators include position, blurred background, and traffic flow" } ], "clip_score": 19.046875, "loss": 3.63653826713562 }, { "id": "identity_235483", "image": "FigureQA/images/31285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Medium Orchid and Aqua?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 32.34375, "loss": 1.8114922046661377 }, { "id": "identity_281777", "image": "ScienceQA/images/20692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic describes this object?\nChoices:\n(A) scratchy\n(B) yellow" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.921875, "loss": 4.117183208465576 }, { "id": "identity_155778", "image": "ScienceQA/images/2707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the North pole on the top magnet?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.140625, "loss": 3.4791901111602783 }, { "id": "identity_289214", "image": "PMC-VQA/images/PMC8568287_pone.0259245.g001_103616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the results of the histologic examination of the perianal tumors? \nChoices:\n(A) Normal anorectal histology \n(B) Large perianal tumors \n(C) Invasive anal carcinoma \n(D) Colonic ulcers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.0625, "loss": 3.7392807006835938 }, { "id": "identity_207660", "image": "A-OKVQA/images/401132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible doughnuts are there on the conveyor belt?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 29.0, "loss": 2.2619738578796387 }, { "id": "identity_75918", "image": "TQA/images/question_images/dna_6769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of base pairs present in the labeled section D?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.875, "loss": 2.1298491954803467 }, { "id": "identity_333268", "image": "A-OKVQA/images/225850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this individual most likely preparing to do?\nChoices:\n(A) working\n(B) birdwatching\n(C) gymnastics\n(D) jogging" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.109375, "loss": 4.364039897918701 }, { "id": "identity_118295", "image": "Super-CLEVR/images/superCLEVR_new_001736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles have two wheels?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.515625, "loss": 2.6114213466644287 }, { "id": "identity_33622", "image": "VizWiz/images/VizWiz_train_00011516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The last picture I took, you said of strawberry ice cream? This is a Avon book, so it's someone holding an ice cream." }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 34.46875, "loss": 4.178074359893799 }, { "id": "identity_312043", "image": "TQA/images/question_images/hydrocarbons_8091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many hydrogen atoms are in methane?\nChoices:\n(A) 2\n(B) 5\n(C) 4\n(D) 3" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.40625, "loss": 3.3032336235046387 }, { "id": "identity_161923", "image": "Geometry3K/images/1179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the other pair of parallel sides of the parallelogram?" }, { "from": "gpt", "value": "The answer is 21 cm" } ], "clip_score": 36.71875, "loss": 1.7915964126586914 }, { "id": "identity_126718", "image": "VQA-RAD/images/synpic47964.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image showing any signs of pathology?" }, { "from": "gpt", "value": "The answer is No obvious pathology to an untrained eye" } ], "clip_score": 27.375, "loss": 3.5387091636657715 }, { "id": "identity_79085", "image": "Geometry3K/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AHB?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 33.28125, "loss": 1.8215031623840332 }, { "id": "identity_68615", "image": "ScienceQA/images/18713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the animal doing in the image?" }, { "from": "gpt", "value": "The answer is Brachiating (swinging from tree branches)" } ], "clip_score": 27.71875, "loss": 2.5387957096099854 }, { "id": "identity_190681", "image": "A-OKVQA/images/316441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the side of the white van?" }, { "from": "gpt", "value": "The answer is ZAI" } ], "clip_score": 20.5, "loss": 3.896127939224243 }, { "id": "identity_81648", "image": "GeoQA+/images/1209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the radius of the circle if AB is 10 units?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 34.1875, "loss": 1.963068962097168 }, { "id": "identity_7023", "image": "Geometry3K/images/1018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find z.\nChoices:\n(A) 12\n(B) 13\n(C) 14\n(D) 15" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.78125, "loss": 2.8697900772094727 }, { "id": "identity_271635", "image": "Super-CLEVR/images/superCLEVR_new_001380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the other large object that shares the same shape as the purple object?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 16.734375, "loss": 3.657881259918213 }, { "id": "identity_112527", "image": "VQA2.0/images/263639.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the train on the platform?" }, { "from": "gpt", "value": "The answer is 165136" } ], "clip_score": 26.859375, "loss": 2.1376278400421143 }, { "id": "identity_152874", "image": "ScienceQA/images/7966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states share a border with the highlighted state?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.9375, "loss": 2.2050464153289795 }, { "id": "identity_73862", "image": "TQA/images/question_images/photosynthesis_4130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Through which part of the plant does water enter as indicated by the blue arrow at the bottom?" }, { "from": "gpt", "value": "The answer is Water enters through the roots" } ], "clip_score": 31.484375, "loss": 2.3502607345581055 }, { "id": "identity_290804", "image": "VQA-AS/images/12834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the type of food that the woman is consuming?" }, { "from": "gpt", "value": "The answer is pie" } ], "clip_score": 19.078125, "loss": 3.5524039268493652 }, { "id": "identity_266996", "image": "VizWiz/images/VizWiz_train_00004672.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify this device?" }, { "from": "gpt", "value": "The answer is laptop case" } ], "clip_score": 27.03125, "loss": 3.3184587955474854 }, { "id": "identity_118204", "image": "VQA-AS/images/18491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person doing in the image?" }, { "from": "gpt", "value": "The answer is Sitting" } ], "clip_score": 22.671875, "loss": 3.472984552383423 }, { "id": "identity_276662", "image": "DVQA/images/bar_train_00166388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many algorithms is the accuracy less than 6 in at least one dataset?" }, { "from": "gpt", "value": "The answer is three" } ], "clip_score": 32.875, "loss": 3.3911423683166504 }, { "id": "identity_62112", "image": "DocVQA/images/smwn0226_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the thickness of the layer mentioned under Ion Bombardment?" }, { "from": "gpt", "value": "The answer is 0.05um" } ], "clip_score": 25.984375, "loss": 1.6928997039794922 }, { "id": "identity_186462", "image": "VQA2.0/images/231958.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the kite closest to the ground?" }, { "from": "gpt", "value": "The answer is Blue and green" } ], "clip_score": 25.171875, "loss": 3.8175978660583496 }, { "id": "identity_275275", "image": "CLEVR-Math/images/CLEVR_train_045019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the small rubber cylinders, how many cylinders are remaining?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.765625, "loss": 2.1366846561431885 }, { "id": "identity_217771", "image": "TextVQA/images/95138708aa79803c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different comic book titles are visible on the table?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 22.09375, "loss": 4.004729747772217 }, { "id": "identity_167180", "image": "IconQA/images/34163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the nail including the head?" }, { "from": "gpt", "value": "The answer is 2.5 inches" } ], "clip_score": 33.5625, "loss": 1.5641024112701416 }, { "id": "identity_315945", "image": "PlotQA/images/116269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When was the percentage of revenue from grants the lowest?" }, { "from": "gpt", "value": "The answer is 2001" } ], "clip_score": 28.203125, "loss": 1.8943440914154053 }, { "id": "identity_83399", "image": "UniGeo/images/3085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the triangle is a right-angled triangle and angle 1 is 60 degrees, what is the measure of angle 2?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 33.1875, "loss": 1.6167449951171875 }, { "id": "identity_293369", "image": "A-OKVQA/images/354721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What provides momentum for the skater?\nChoices:\n(A) dog\n(B) gasoline\n(C) petrol\n(D) wheels" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.28125, "loss": 4.158251762390137 }, { "id": "identity_259785", "image": "PMC-VQA/images/PMC3773898_fig4_231613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertebrae are visible in the lumbar spine in Case 2, and what is the total number of vertebrae visible in the entire spine in the image?" }, { "from": "gpt", "value": "The answer is 5 lumbar vertebrae, 29 total vertebrae" } ], "clip_score": 27.8125, "loss": 1.8865009546279907 }, { "id": "identity_114146", "image": "TextVQA/images/9b2524992667fb23.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the woman's dress standing in the room?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 16.890625, "loss": 2.614051580429077 }, { "id": "identity_243068", "image": "GeoQA+/images/3402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that the diagonals of an isosceles trapezoid ABCD intersect at point O, and the midpoints of the sides AB, BC, CD, and DA are E, F, G, and H respectively, if the length of diagonal AC is 10 meters, find the area of quadrilateral EFGH" }, { "from": "gpt", "value": "The answer is 12.5 square meters" } ], "clip_score": 35.53125, "loss": 1.4018436670303345 }, { "id": "identity_114693", "image": "Super-CLEVR/images/superCLEVR_new_000093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of wheels visible in the image?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 18.015625, "loss": 2.1224989891052246 }, { "id": "identity_264428", "image": "VQA-RAD/images/synpic19114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the densities observed in the lung fields?" }, { "from": "gpt", "value": "The answer is pleural plaques" } ], "clip_score": 29.015625, "loss": 4.382754802703857 }, { "id": "identity_27875", "image": "PMC-VQA/images/PMC6398457_BIO036939F3_443767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure indicated by arrows in image C? \nChoices:\n(A) Spermatid heads \n(B) Plasma membranes \n(C) Septa-like pattern \n(D) Cyst Cells " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.25, "loss": 5.083576202392578 }, { "id": "identity_294777", "image": "PlotQA/images/117198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is represented by the 3rd bar from the left in the year 2012?" }, { "from": "gpt", "value": "The answer is Tanzania" } ], "clip_score": 23.734375, "loss": 3.8369269371032715 }, { "id": "identity_189234", "image": "Super-CLEVR/images/superCLEVR_new_001357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.71875, "loss": 2.5525777339935303 }, { "id": "identity_207714", "image": "PMC-VQA/images/PMC3509398_F2_169201.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"D1\" indicate on the image?" }, { "from": "gpt", "value": "The answer is First diagonal branch of the Left Anterior Descending artery" } ], "clip_score": 25.828125, "loss": 2.446309804916382 }, { "id": "identity_317335", "image": "UniGeo/images/1237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB、BC与⊙O相切于点A、B,EF与⊙O相切于点D。若BA=20,则△BEF的周长等于多少?\n选项:\n(A) 20\n(B) 40\n(C) 30\n(D) 60" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.09375, "loss": 2.4171645641326904 }, { "id": "identity_183266", "image": "A-OKVQA/images/329946.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the kite located at the top right corner of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.578125, "loss": 3.044807195663452 }, { "id": "identity_37458", "image": "IconQA/images/83618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) probable\n(B) impossible\n(C) certain\n(D) unlikely" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 4.436082363128662 }, { "id": "identity_198919", "image": "PMC-VQA/images/PMC4009138_Fig3_285691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled with the red arrow in the image?" }, { "from": "gpt", "value": "The answer is Trachea" } ], "clip_score": 23.171875, "loss": 3.416212320327759 }, { "id": "identity_45197", "image": "FigureQA/images/76711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label does Maroon first exceed a value of 40?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 28.71875, "loss": 1.914613127708435 }, { "id": "identity_287839", "image": "PMC-VQA/images/PMC2892667_fig2_67547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is a common observation in both lungs?\nChoices:\n(A) Perfusion defects \n(B) Areas of consolidation \n(C) No abnormalities \n(D) Bronchial constrictions" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.984375, "loss": 4.6719207763671875 }, { "id": "identity_332852", "image": "PMC-VQA/images/PMC8666393_Fig2_137527.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow pointing to in the image?\nChoices:\n(A) to indicate the presence of lymph nodes \n(B) to indicate the presence of a tumor \n(C) to indicate the presence of a foreign body \n(D) to indicate the presence of inflammation" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.46875, "loss": 4.377132892608643 }, { "id": "identity_197102", "image": "PMC-VQA/images/PMC4385022_Fig5_374884.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teeth are visible in the lower jaw?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 29.40625, "loss": 2.259519338607788 }, { "id": "identity_308678", "image": "AI2D/images/49.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What stage of the butterfly's life cycle is indicated at letter G?\nChoices:\n(A) adult\n(B) larva\n(C) pupa\n(D) egg" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.0625, "loss": 4.353959083557129 }, { "id": "identity_240782", "image": "IconQA/images/51649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does this picture have?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 25.6875, "loss": 2.123171091079712 }, { "id": "identity_95368", "image": "IconQA/images/61099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many suns are there in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.484375, "loss": 2.2893855571746826 }, { "id": "identity_214583", "image": "A-OKVQA/images/184241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis racquets are leaning against the bench?" }, { "from": "gpt", "value": "The answer is Two tennis racquets" } ], "clip_score": 28.734375, "loss": 3.1594350337982178 }, { "id": "identity_245837", "image": "A-OKVQA/images/143696.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats are parked along the curb, and what is the color of the building next to the boats?" }, { "from": "gpt", "value": "The answer is 10 boats, brown and white building" } ], "clip_score": 19.46875, "loss": 2.618159770965576 }, { "id": "identity_335055", "image": "IconQA/images/20353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated? Answer using a time word, not a number. It is (_) past seven." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 30.1875, "loss": 5.023888111114502 }, { "id": "identity_77915", "image": "TQA/images/abc_question_images/human_system_excretory_16117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the tubes represented by the label 'U'?" }, { "from": "gpt", "value": "The answer is Ureters" } ], "clip_score": 22.96875, "loss": 2.2809011936187744 }, { "id": "identity_221845", "image": "VQA2.0/images/101713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.8515625, "loss": 3.004383087158203 }, { "id": "identity_231080", "image": "FigureQA/images/7995.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what percentage is Blue greater than Orange Red?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 36.84375, "loss": 1.4649274349212646 }, { "id": "identity_334347", "image": "PMC-VQA/images/PMC7566610_Fig7_276673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which lobes are affected in this image?\nChoices:\n(A) Left upper and lower lobes\n(B) Right upper and lower lobes\n(C) Left lower lobe only\n(D) Right middle and lower lobes." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.4375, "loss": 4.287473201751709 }, { "id": "identity_293925", "image": "PMC-VQA/images/PMC1794319_pone-0000234-g001_9340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to analyze the honey bee antenna? \nOptions:\n(A) Transmission electron microscopy \n(B) Confocal microscopy \n(C) Scanning electron microscopy \n(D) Fluorescence microscopy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.359375, "loss": 3.7793915271759033 }, { "id": "identity_268824", "image": "DocVQA/images/njnp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the quantity of Original Designer Print pieces from the Metropolitan Opera Production that were available?" }, { "from": "gpt", "value": "The answer is 500" } ], "clip_score": 24.34375, "loss": 2.032824754714966 }, { "id": "identity_147202", "image": "PlotQA/images/1603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value of forest rent as percent of GDP?" }, { "from": "gpt", "value": "The answer is 3.2%" } ], "clip_score": 25.0, "loss": 2.0479562282562256 }, { "id": "identity_284448", "image": "TabMWP/images/15949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of fire hydrants recorded on each street in the city?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 40.6875, "loss": 2.2044119834899902 }, { "id": "identity_77883", "image": "AI2D/images/1496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What term is used to describe the flow of water over the land surface towards the ocean?" }, { "from": "gpt", "value": "The answer is Surface runoff" } ], "clip_score": 29.96875, "loss": 2.2017011642456055 }, { "id": "identity_269650", "image": "DVQA/images/bar_train_00122934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is represented by the color steelblue?" }, { "from": "gpt", "value": "The answer is dark" } ], "clip_score": 22.953125, "loss": 3.755840301513672 }, { "id": "identity_38846", "image": "DocVQA/images/phcn0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is this fax addressed?" }, { "from": "gpt", "value": "The answer is James Ueshal" } ], "clip_score": 26.96875, "loss": 3.5528106689453125 }, { "id": "identity_286361", "image": "A-OKVQA/images/54304.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which red food item is the man eating from the following options: apple, pepper, cherry, or tomato?\nChoices:\n(A) apple\n(B) pepper\n(C) cherry\n(D) tomato" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.546875, "loss": 4.045467853546143 }, { "id": "identity_306499", "image": "FigureQA/images/27463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Gray?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.453125, "loss": 3.95185923576355 }, { "id": "identity_211952", "image": "VQA-AS/images/17741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the curtains in the room?" }, { "from": "gpt", "value": "The answer is Red and white" } ], "clip_score": 21.453125, "loss": 3.4821929931640625 }, { "id": "identity_93328", "image": "TabMWP/images/14865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of contestants who played the electric guitar?" }, { "from": "gpt", "value": "The answer is 3 + 3 = 6" } ], "clip_score": 37.125, "loss": 1.6531065702438354 }, { "id": "identity_165729", "image": "GeoQA+/images/4028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In quadrilateral ABCD, with M, N, P as midpoints of AD, BC, and BD respectively, if AB = CD = 8 units, what is the perimeter of triangle MPN?" }, { "from": "gpt", "value": "The answer is 12 units" } ], "clip_score": 35.25, "loss": 1.783383846282959 }, { "id": "identity_140229", "image": "DocVQA/images/nsjl0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the machine engagement?" }, { "from": "gpt", "value": "The answer is 00'27" } ], "clip_score": 18.984375, "loss": 2.912726879119873 }, { "id": "identity_221888", "image": "VQA2.0/images/411111.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car closest to the camera?" }, { "from": "gpt", "value": "The answer is Silver" } ], "clip_score": 19.46875, "loss": 3.320621967315674 }, { "id": "identity_96267", "image": "CLEVR-Math/images/CLEVR_train_009042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.390625, "loss": 2.3202643394470215 }, { "id": "identity_327732", "image": "TabMWP/images/29442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each shopping bag contains 5 sweatshirts, how many sweatshirts are there in 6 bags?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 42.0, "loss": 1.6681605577468872 }, { "id": "identity_187639", "image": "TextVQA/images/7892c79ed647a939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the building shown in the image?" }, { "from": "gpt", "value": "The answer is Rose of England" } ], "clip_score": 29.609375, "loss": 3.5273189544677734 }, { "id": "identity_42166", "image": "DVQA/images/bar_train_00026554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the x-axis?" }, { "from": "gpt", "value": "The answer is Values" } ], "clip_score": 27.921875, "loss": 3.270437479019165 }, { "id": "identity_211622", "image": "TextVQA/images/ea263375ec83dcd1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the county mentioned on the sign?" }, { "from": "gpt", "value": "The answer is Clark County" } ], "clip_score": 23.921875, "loss": 3.5471084117889404 }, { "id": "identity_130240", "image": "Super-CLEVR/images/superCLEVR_new_000251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have four wheels?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.0625, "loss": 2.7367870807647705 }, { "id": "identity_150571", "image": "ScienceQA/images/4751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the given map of the United States?" }, { "from": "gpt", "value": "The answer is Louisiana" } ], "clip_score": 34.78125, "loss": 3.201758623123169 }, { "id": "identity_126733", "image": "A-OKVQA/images/282042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are visible in the image?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 17.140625, "loss": 4.539875030517578 }, { "id": "identity_269164", "image": "TextVQA/images/e6ab3f563bde9b2a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which month was this issue published?" }, { "from": "gpt", "value": "The answer is september" } ], "clip_score": 15.0625, "loss": 3.701033115386963 }, { "id": "identity_242792", "image": "PMC-VQA/images/PMC8756656_Fig2_163608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristic imaging findings in a patient with COVID-19, and how do they differ from other common pulmonary conditions such as bacterial pneumonia and pulmonary edema?" }, { "from": "gpt", "value": "The answer is Peripheral patchy consolidations and reversed halo sign" } ], "clip_score": 29.0, "loss": 3.138409376144409 }, { "id": "identity_320501", "image": "IconQA/images/69908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a plane. What is the fifth image?\nChoices:\n(A) plane\n(B) bear\n(C) train" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.875, "loss": 3.851478099822998 }, { "id": "identity_311266", "image": "DocVQA/images/mkvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what stage is the 'Totelle Combined Monograph' project?" }, { "from": "gpt", "value": "The answer is Under development by DesignWrite based on parthenon draft" } ], "clip_score": 15.8515625, "loss": 5.085654258728027 }, { "id": "identity_34777", "image": "DocVQA/images/fpjh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the place of birth of Kermit W. Beal?" }, { "from": "gpt", "value": "The answer is Hastings, Nebraska" } ], "clip_score": 21.96875, "loss": 1.9629149436950684 }, { "id": "identity_76548", "image": "TQA/images/abc_question_images/parts_skeleton_17271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter corresponds to the Femur?" }, { "from": "gpt", "value": "The answer is K" } ], "clip_score": 26.96875, "loss": 3.8509914875030518 }, { "id": "identity_218704", "image": "A-OKVQA/images/567944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tree is in the center of the image?" }, { "from": "gpt", "value": "The answer is Tropical" } ], "clip_score": 19.109375, "loss": 3.680508852005005 }, { "id": "identity_25107", "image": "DocVQA/images/fqyf0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the document dated?" }, { "from": "gpt", "value": "The answer is 4/17/75" } ], "clip_score": 21.734375, "loss": 1.4763728380203247 }, { "id": "identity_124212", "image": "TextVQA/images/eb0d2e351d5c2594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing a cap in the image?" }, { "from": "gpt", "value": "The answer is At least 21" } ], "clip_score": 18.75, "loss": 2.878676414489746 }, { "id": "identity_327640", "image": "AI2D/images/97.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these insects lives in a colony and doesn't fly?\nChoices:\n(A) Butterfly\n(B) Ant\n(C) Housefly\n(D) Wasp" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.5, "loss": 3.873823881149292 }, { "id": "identity_33046", "image": "PMC-VQA/images/PMC6555215_F1_490264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of fracture is seen in the CT scan? \nChoices:\n(A) Compression fracture \n(B) Burst fracture \n(C) Avulsion fracture \n(D) Stress fracture " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.375, "loss": 4.0260210037231445 }, { "id": "identity_204318", "image": "A-OKVQA/images/371351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible leg bands on the flamingos?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.75, "loss": 2.958003520965576 }, { "id": "identity_180406", "image": "TabMWP/images/19383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of baseballs hit between the highest and lowest hitting days?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 38.0625, "loss": 3.101829767227173 }, { "id": "identity_278326", "image": "VQA-RAD/images/synpic43433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there an expansion or enlargement of the bowel?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.953125, "loss": 2.9666028022766113 }, { "id": "identity_23003", "image": "VQA2.0/images/429384.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of person would be comfortable in this room?" }, { "from": "gpt", "value": "The answer is kids" } ], "clip_score": 23.71875, "loss": 3.1540889739990234 }, { "id": "identity_246337", "image": "TextVQA/images/29cf515617d0b112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the dates and times of the play \"Dradin, In Love\" at the Granary Theatre, and how much does a ticket cost?" }, { "from": "gpt", "value": "The answer is May 22nd-25th, 8pm, €12" } ], "clip_score": 38.28125, "loss": 1.4005560874938965 }, { "id": "identity_64653", "image": "TQA/images/abc_question_images/human_system_respiratory_16193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the esophagus?" }, { "from": "gpt", "value": "The answer is N" } ], "clip_score": 27.15625, "loss": 3.2302513122558594 }, { "id": "identity_230631", "image": "FigureQA/images/17189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the heights of Dark Orchid and Dark Green, and is this difference greater than the height of Dark Red?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 27.0, "loss": 3.867541551589966 }, { "id": "identity_201268", "image": "PMC-VQA/images/PMC5817300_fig6_277598.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is predominantly featured in this image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 28.40625, "loss": 3.783578395843506 }, { "id": "identity_114380", "image": "A-OKVQA/images/220372.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible Christmas decorations?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.84375, "loss": 2.8671677112579346 }, { "id": "identity_240668", "image": "MapQA/images/map_1787.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage range of manufacturers for Utah and the state directly north of Utah?" }, { "from": "gpt", "value": "The answer is 94.0%-96.6%" } ], "clip_score": 35.71875, "loss": 1.9703601598739624 }, { "id": "identity_272492", "image": "TQA/images/question_images/lewis_dot_diagrams_9132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the number of covalent bonds that Bromine typically forms?\nChoices:\n(A) 1\n(B) 3\n(C) 2\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.546875, "loss": 3.2050235271453857 }, { "id": "identity_112903", "image": "TextVQA/images/1b5a0ae5c15fddf6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shop's main signage?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.625, "loss": 3.5514378547668457 }, { "id": "identity_290946", "image": "TextVQA/images/07cce5fbd1fe2ef9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Whose papaya is this?" }, { "from": "gpt", "value": "The answer is gray's" } ], "clip_score": 15.921875, "loss": 2.3621485233306885 }, { "id": "identity_232008", "image": "IconQA/images/50022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you select two marbles without looking, how likely is it that both will be black?" }, { "from": "gpt", "value": "The answer is certain" } ], "clip_score": 32.59375, "loss": 4.457707405090332 }, { "id": "identity_120548", "image": "A-OKVQA/images/183620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flag is flying on the pole in the background?" }, { "from": "gpt", "value": "The answer is American flag" } ], "clip_score": 19.015625, "loss": 2.8675711154937744 }, { "id": "identity_147974", "image": "PlotQA/images/25011.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend of female unemployment from 2002 to 2007?" }, { "from": "gpt", "value": "The answer is Increasing" } ], "clip_score": 24.765625, "loss": 3.4782464504241943 }, { "id": "identity_147597", "image": "DocVQA/images/qghv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is scheduled for the \"STRETCH BREAK\"?" }, { "from": "gpt", "value": "The answer is No specific time given" } ], "clip_score": 16.71875, "loss": 3.9818525314331055 }, { "id": "identity_253464", "image": "TextVQA/images/26cb1858072bdfc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of public transportation vehicle is shown in the image, and what are the key visual features that help identify it?" }, { "from": "gpt", "value": "The answer is Trolley or tram based on tracks and design features" } ], "clip_score": 30.453125, "loss": 4.248106956481934 }, { "id": "identity_194472", "image": "A-OKVQA/images/79869.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the notebook on the desk?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.6875, "loss": 3.47257661819458 }, { "id": "identity_246573", "image": "IconQA/images/85742.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the current time is 2:00 P.M. and you plan to take a 90-minute nap, what time will it be when you wake up?" }, { "from": "gpt", "value": "The answer is 3:30 P.M" } ], "clip_score": 31.890625, "loss": 0.9785490036010742 }, { "id": "identity_221136", "image": "A-OKVQA/images/84474.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of body of water are the seaplanes on?" }, { "from": "gpt", "value": "The answer is Harbor or Sea (Acceptable answers depending on geographical knowledge)" } ], "clip_score": 32.09375, "loss": 3.9743356704711914 }, { "id": "identity_102907", "image": "TabMWP/images/13692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you have $40, can you afford to buy both a teapot and a serving platter?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.40625, "loss": 2.9883456230163574 }, { "id": "identity_258285", "image": "VQA2.0/images/169907.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible on the desk and the floor, and what types are they?" }, { "from": "gpt", "value": "The answer is 14 devices: computer monitor, keyboard, mouse, speaker, remote control, CD/DVD drive, modem/router, power strip, phone, computer tower, subwoofer" } ], "clip_score": 31.25, "loss": 1.9076157808303833 }, { "id": "identity_19282", "image": "PMC-VQA/images/PMC3350148_fig3_137475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the right side of the image show? \nChoices:\n(A) Lymphocele \n(B) Vein \n(C) Bladder \n(D) Ureter " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.734375, "loss": 5.167790412902832 }, { "id": "identity_148218", "image": "DocVQA/images/grvv0228_26.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more was spent on Project Expenditures than was earned in Income?" }, { "from": "gpt", "value": "The answer is -1.8" } ], "clip_score": 26.15625, "loss": 2.204148769378662 }, { "id": "identity_297120", "image": "DocVQA/images/lfgn0226_41.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which version is referenced in the document?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.71875, "loss": 3.616270065307617 }, { "id": "identity_326787", "image": "TabMWP/images/21091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An insurance company compiled the number of doctor visits made by some patients last year. How many patients went to the doctor at least a couple of times?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 31.140625, "loss": 2.5916531085968018 }, { "id": "identity_34588", "image": "IconQA/images/33194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are on the frame?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.515625, "loss": 2.4721426963806152 }, { "id": "identity_96644", "image": "IconQA/images/63322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is green?" }, { "from": "gpt", "value": "The answer is 7/8" } ], "clip_score": 27.46875, "loss": 1.6977403163909912 }, { "id": "identity_25002", "image": "DocVQA/images/rshg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Associate Professor ?" }, { "from": "gpt", "value": "The answer is (Mrs.) Martha N. Lewis" } ], "clip_score": 17.5, "loss": 2.8069143295288086 }, { "id": "identity_241647", "image": "GeoQA+/images/1584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,长方体的长为5,宽为3,高为12,点B离点C的距离为2,一只蚂蚁如果要沿着长方体的表面从点A爬到点B,需要爬行的最短距离是()。请计算蚂蚁从点A爬到点C再爬到点B的总距离。" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 24.34375, "loss": 2.7961063385009766 }, { "id": "identity_142962", "image": "DocVQA/images/nlcn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the document mentioned at the bottom of the page?" }, { "from": "gpt", "value": "The answer is NRDC on OMB BA Bulletin June 2006" } ], "clip_score": 25.046875, "loss": 3.6370584964752197 }, { "id": "identity_2921", "image": "FigureQA/images/93641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange less than Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.421875, "loss": 3.713331699371338 }, { "id": "identity_70169", "image": "TQA/images/question_images/dna_6767.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the nucleobase that pairs with Adenine in DNA?" }, { "from": "gpt", "value": "The answer is Thymine (T)" } ], "clip_score": 27.796875, "loss": 1.7427984476089478 }, { "id": "identity_198585", "image": "PMC-VQA/images/PMC3646783_pone-0063430-g001_203142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique is used in this histological image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and eosin (H&E) staining" } ], "clip_score": 31.234375, "loss": 1.0353506803512573 }, { "id": "identity_260061", "image": "PMC-VQA/images/PMC7007391_Figure1_113430.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the common symptoms and diagnostic methods for the medical condition associated with the tumor, and how do they differ from those of the other conditions listed?" }, { "from": "gpt", "value": "The answer is Pneumonia symptoms include cough, fever, chills, and difficulty breathing" } ], "clip_score": 23.9375, "loss": 2.092413902282715 }, { "id": "identity_51980", "image": "DVQA/images/bar_train_00131252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference for the object 'device' between the 'street' and 'while' categories?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.125, "loss": 3.134939193725586 }, { "id": "identity_5999", "image": "PlotQA/images/75577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average population in largest city per country ?" }, { "from": "gpt", "value": "The answer is 7449873.0" } ], "clip_score": 29.875, "loss": 2.823749542236328 }, { "id": "identity_284231", "image": "VQA2.0/images/125263.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the reason for the abundance of boats?" }, { "from": "gpt", "value": "The answer is harbor" } ], "clip_score": 25.828125, "loss": 2.8791658878326416 }, { "id": "identity_127223", "image": "VQA2.0/images/509657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the object tied to the front of the truck?" }, { "from": "gpt", "value": "The answer is A pair of horns" } ], "clip_score": 20.359375, "loss": 2.8940916061401367 }, { "id": "identity_47170", "image": "PlotQA/images/97109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate number of ATMs in Congo (Republic) in the year 2007?" }, { "from": "gpt", "value": "The answer is Approximately 20" } ], "clip_score": 26.1875, "loss": 2.5075180530548096 }, { "id": "identity_18361", "image": "UniGeo/images/1271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD是矩形,AC为⊙O的直径,过点B作⊙O的切线,与AC的延长线交于点P,若AC=10,∠P=30°,则AB的长度是()\nChoices:\n(A) \\frac{5}{2}\n(B) 5\n(C) 5√{2}\n(D) 5√{3}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.34375, "loss": 2.7415051460266113 }, { "id": "identity_308301", "image": "VizWiz/images/VizWiz_train_00004936.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What flavor is this ice cream, please?" }, { "from": "gpt", "value": "The answer is blackberry cobbler" } ], "clip_score": 21.328125, "loss": 2.942814826965332 }, { "id": "identity_203936", "image": "A-OKVQA/images/468548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the aprons worn by the workers?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 18.40625, "loss": 3.543314218521118 }, { "id": "identity_106555", "image": "A-OKVQA/images/209930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many light towers are there in the stadium?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.890625, "loss": 4.13926362991333 }, { "id": "identity_152414", "image": "TQA/images/abc_question_images/human_system_respiratory_16205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label points to the structure that prevents food from entering the respiratory tract?" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 32.4375, "loss": 2.7513504028320312 }, { "id": "identity_237051", "image": "VQA-RAD/images/synpic52988.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of consolidation or pleural effusion in addition to the patchy infiltrates?" }, { "from": "gpt", "value": "The answer is Yes, signs of consolidation and pleural effusion are present" } ], "clip_score": 30.890625, "loss": 1.793555736541748 }, { "id": "identity_245344", "image": "A-OKVQA/images/490872.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of items are visible on the toilet tank and the sink counter combined?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.140625, "loss": 2.1818552017211914 }, { "id": "identity_258802", "image": "MapQA/images/map_2291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of youngsters in Iowa, Nebraska, and Kansas combined?" }, { "from": "gpt", "value": "The answer is 1572-1647" } ], "clip_score": 31.15625, "loss": 2.6464810371398926 }, { "id": "identity_94961", "image": "IconQA/images/31004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the pattern continues, what would be the eighth image?" }, { "from": "gpt", "value": "The answer is tractor" } ], "clip_score": 27.421875, "loss": 2.692157030105591 }, { "id": "identity_312182", "image": "VQA2.0/images/432328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are they positioned behind a barrier?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.859375, "loss": 3.831775665283203 }, { "id": "identity_13071", "image": "VQA-RAD/images/synpic39086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there hyper attenuating material along the aortic walls?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.25, "loss": 3.4505527019500732 }, { "id": "identity_248988", "image": "VQA2.0/images/16496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are there in total, and what percentage of them are yellow?" }, { "from": "gpt", "value": "The answer is 50 motorcycles, 2% are yellow" } ], "clip_score": 27.34375, "loss": 2.1377499103546143 }, { "id": "identity_195942", "image": "PMC-VQA/images/PMC3836889_pone-0081317-g005_245505.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any indications of pathology in the tissue shown in the image?" }, { "from": "gpt", "value": "The answer is Possible indications of pathology, but definitive assessment requires context" } ], "clip_score": 31.125, "loss": 3.3404057025909424 }, { "id": "identity_107352", "image": "PMC-VQA/images/PMC6717321_Fig2_30960.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of implant is visible in the radiograph?" }, { "from": "gpt", "value": "The answer is Femoral implant" } ], "clip_score": 32.25, "loss": 3.6265833377838135 }, { "id": "identity_116166", "image": "TextVQA/images/122d892264e2094b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written at the bottom of the phone?" }, { "from": "gpt", "value": "The answer is CYON" } ], "clip_score": 21.3125, "loss": 2.5101447105407715 }, { "id": "identity_2283", "image": "DocVQA/images/hmxf0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: in which state is Barnes Hospital at ?" }, { "from": "gpt", "value": "The answer is Missouri" } ], "clip_score": 23.953125, "loss": 2.6138787269592285 }, { "id": "identity_112229", "image": "VQA2.0/images/526098.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the items in the stroller?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 22.390625, "loss": 3.1224915981292725 }, { "id": "identity_287336", "image": "TextVQA/images/f279bb5e48c9ea01.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which lottery is being displayed on the screen?" }, { "from": "gpt", "value": "The answer is mega millions" } ], "clip_score": 24.75, "loss": 2.6541335582733154 }, { "id": "identity_218824", "image": "TextVQA/images/67982a19ee1bd0d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cap worn by the person standing next to the cameraperson?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 15.1953125, "loss": 4.029069900512695 }, { "id": "identity_323351", "image": "ScienceQA/images/5944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What describes the Kibale National Forest ecosystem?\nChoices:\n(A) It has many different types of organisms.\n(B) It has mostly small plants." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.140625, "loss": 3.892545700073242 }, { "id": "identity_29242", "image": "TabMWP/images/28055.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 4.487978458404541 }, { "id": "identity_150388", "image": "AI2D/images/641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which layer of the Earth is directly beneath the Crust?" }, { "from": "gpt", "value": "The answer is Mantle" } ], "clip_score": 33.3125, "loss": 2.204970121383667 }, { "id": "identity_30530", "image": "DocVQA/images/kzhb0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total direct costs requested for first 12-month period?" }, { "from": "gpt", "value": "The answer is 30,781" } ], "clip_score": 16.5, "loss": 2.4540364742279053 }, { "id": "identity_232505", "image": "GeoQA+/images/2771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠1 = 15° and the right triangle includes a 30° angle, calculate the measure of ∠3, which is the angle adjacent to ∠2 on the opposite side of the right triangle" }, { "from": "gpt", "value": "The answer is 135°" } ], "clip_score": 29.359375, "loss": 1.6163830757141113 }, { "id": "identity_281517", "image": "TextVQA/images/3a19d0affc691e43.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is currently at the plate?" }, { "from": "gpt", "value": "The answer is hidehi matsui" } ], "clip_score": 23.890625, "loss": 3.854316234588623 }, { "id": "identity_233472", "image": "GeoQA+/images/5564.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C、D均在⊙O上,AB=AC,BD是⊙O的直径,连接CD,若∠A=36°,求∠BDC的度数。" }, { "from": "gpt", "value": "The answer is 9°" } ], "clip_score": 28.796875, "loss": 2.8462111949920654 }, { "id": "identity_275051", "image": "FigureQA/images/20072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Turquoise the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.5, "loss": 3.852128505706787 }, { "id": "identity_303179", "image": "ScienceQA/images/5038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What might Cara's test indicate?\nChoices:\n(A) whether producing more insulin would help the bacteria grow faster\n(B) whether she added enough nutrients to help the bacteria produce 20% more insulin\n(C) whether different types of bacteria would need different nutrients to produce insulin" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.828125, "loss": 3.2049026489257812 }, { "id": "identity_166053", "image": "IconQA/images/19157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Carter took twice as long to watch TV as he did to get dressed, how long did he watch TV?" }, { "from": "gpt", "value": "The answer is 10 minutes" } ], "clip_score": 30.34375, "loss": 1.9649707078933716 }, { "id": "identity_248732", "image": "TextVQA/images/1daf09612004fe93.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What types of information are provided on this board, and how are they categorized?" }, { "from": "gpt", "value": "The answer is Maps, advertisements, contact details, and local services" } ], "clip_score": 26.796875, "loss": 3.4353694915771484 }, { "id": "identity_258192", "image": "Super-CLEVR/images/superCLEVR_new_000398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rubber things are either big wagons, large purple objects, or green objects?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.5625, "loss": 2.4652798175811768 }, { "id": "identity_148503", "image": "DocVQA/images/lghx0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the grant number associated with the payment?" }, { "from": "gpt", "value": "The answer is 502" } ], "clip_score": 20.515625, "loss": 3.042387008666992 }, { "id": "identity_221755", "image": "Super-CLEVR/images/superCLEVR_new_001603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the small vehicle to the left of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 24.859375, "loss": 3.1643550395965576 }, { "id": "identity_150871", "image": "ScienceQA/images/15848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any specific minerals present in the rock from the image?" }, { "from": "gpt", "value": "The answer is Amphibole, pyroxene, and plagioclase feldspar" } ], "clip_score": 26.28125, "loss": 1.7705937623977661 }, { "id": "identity_8775", "image": "DVQA/images/bar_train_00060869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bar has the smallest value?" }, { "from": "gpt", "value": "The answer is male" } ], "clip_score": 21.421875, "loss": 3.369962215423584 }, { "id": "identity_261151", "image": "TextVQA/images/78d8b7071cde5fbf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does the word \"soldes\" appear in the entire image?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 23.8125, "loss": 1.8594385385513306 }, { "id": "identity_222244", "image": "VQA2.0/images/467753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.015625, "loss": 2.4617838859558105 }, { "id": "identity_22504", "image": "PMC-VQA/images/PMC6831606_Fig8_62082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the colors purple and brown represent in the micrographs? \nChoices:\n(A) Purple represents human cells and brown represents host cells \n(B) Purple represents host cells and brown represents human cells \n(C) Purple represents proteins and brown represents granules \n(D) Purple represents granules and brown represents cells " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.625, "loss": 3.8301634788513184 }, { "id": "identity_1554", "image": "AI2D/images/1784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the Antarctic food web given here, what would happen to the Elephant seal population, when the small-toothed whale population decreases?\nChoices:\n(A) Decreases\n(B) remains the same\n(C) Decreases and Increases\n(D) Increases" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.296875, "loss": 3.174992322921753 }, { "id": "identity_327212", "image": "A-OKVQA/images/120088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many small elephants are near the tree with their families?\nChoices:\n(A) four\n(B) three\n(C) five\n(D) two" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.921875, "loss": 2.9432427883148193 }, { "id": "identity_229243", "image": "UniGeo/images/517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, 圆锥底面半径为 r cm, 母线长为 10 cm, 其侧面展开图是圆心角为 216° 的扇形, 求圆锥的体积。" }, { "from": "gpt", "value": "The answer is 96π cm³" } ], "clip_score": 26.125, "loss": 1.9786090850830078 }, { "id": "identity_36937", "image": "A-OKVQA/images/548957.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color is the round fruit underneath of the apples?\nChoices:\n(A) red\n(B) green\n(C) blue\n(D) orange" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.03125, "loss": 4.004456996917725 }, { "id": "identity_138716", "image": "DVQA/images/bar_train_00168380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm on the 'world' dataset for 'region'?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 37.25, "loss": 2.595578908920288 }, { "id": "identity_69382", "image": "AI2D/images/872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of herbivores and carnivores combined in the food web?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.71875, "loss": 2.376943349838257 }, { "id": "identity_25327", "image": "PMC-VQA/images/PMC8960880_Fig7_237236.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which regions of the implant are shown in higher power views in the panel on the right? \nChoices:\n(A) Head and tail \n(B) Tail and body \n(C) Head and tip \n(D) Body and tip " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1796875, "loss": 4.151106834411621 }, { "id": "identity_202505", "image": "VQA2.0/images/280241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the sheep's wool?" }, { "from": "gpt", "value": "The answer is Off-white or dirty white" } ], "clip_score": 27.6875, "loss": 3.8653564453125 }, { "id": "identity_18578", "image": "IconQA/images/25871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this picture have symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.875, "loss": 3.4697303771972656 }, { "id": "identity_275751", "image": "FigureQA/images/51635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Forest Green or Medium Aqua?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.609375, "loss": 3.7777233123779297 }, { "id": "identity_40631", "image": "DVQA/images/bar_train_00015291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'handle' and 'attack' algorithms?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 42.5625, "loss": 2.9435768127441406 }, { "id": "identity_156387", "image": "ScienceQA/images/9861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary feathers can be seen on the bird's wing?" }, { "from": "gpt", "value": "The answer is An integer number, depending on the visibility in the image" } ], "clip_score": 24.859375, "loss": 3.7522783279418945 }, { "id": "identity_269192", "image": "DVQA/images/bar_train_00134889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the bars run horizontally?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 22.03125, "loss": 4.330430030822754 }, { "id": "identity_55509", "image": "FigureQA/images/84025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Light Coral have a consistent value across the graph?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.703125, "loss": 2.942082166671753 }, { "id": "identity_131386", "image": "VQA2.0/images/221888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beach chairs can be seen in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.9375, "loss": 2.584696054458618 }, { "id": "identity_243418", "image": "DocVQA/images/hnfb0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the location where the symposium was held, including the suite name, hotel name, street address, and city?" }, { "from": "gpt", "value": "The answer is TERRACE SUITE, THE HOTEL ROOSEVELT, 45 East 45th Street, New York, New York" } ], "clip_score": 16.96875, "loss": 1.208079218864441 }, { "id": "identity_288130", "image": "PMC-VQA/images/PMC7724780_Fig6_317019.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the results of the CT scan?\nChoices:\n(A) Normal findings \n(B) Multiple peripheral rounded ground-glass opacity (red circle) \n(C) Lung abscess \n(D) Brain tumor" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.625, "loss": 3.945578098297119 }, { "id": "identity_266181", "image": "TabMWP/images/16681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price difference between a silver coat rack and a crystal vase?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 34.375, "loss": 3.7430479526519775 }, { "id": "identity_211578", "image": "PMC-VQA/images/PMC6857121_F1_68934.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the matter that predominantly surrounds the central structure?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.25, "loss": 3.297541856765747 }, { "id": "identity_265173", "image": "ChartQA/images/multi_col_60509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean value of the navy blue bars?" }, { "from": "gpt", "value": "The answer is 24.75" } ], "clip_score": 24.328125, "loss": 2.3332149982452393 }, { "id": "identity_181782", "image": "CLEVR-Math/images/CLEVR_train_002329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cubes or spheres in the image?" }, { "from": "gpt", "value": "The answer is More cubes" } ], "clip_score": 27.96875, "loss": 2.5203635692596436 }, { "id": "identity_331014", "image": "PMC-VQA/images/PMC6873490_Fig1_72668.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the findings in these pulmonary CT images?\nChoices:\n(A) Single enhanced nodule in the right upper lobe.\n(B) Bilateral flaky blurred shadows with ill-defined boundary.\n(C) Small round lesions in lower lobe.\n(D) Normal pulmonary CT images." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.65625, "loss": 3.986743450164795 }, { "id": "identity_183650", "image": "A-OKVQA/images/511299.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing on the boat?" }, { "from": "gpt", "value": "The answer is He is cleaning the boat" } ], "clip_score": 20.828125, "loss": 2.6971280574798584 }, { "id": "identity_91457", "image": "Geometry3K/images/1764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of FG?" }, { "from": "gpt", "value": "The answer is 4.6875" } ], "clip_score": 29.296875, "loss": 2.4095964431762695 }, { "id": "identity_83105", "image": "GeoQA+/images/4883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, with AB = AC = 5 and BC = 3, and MN perpendicular bisecting AB at point N, what is the length of segment ND?" }, { "from": "gpt", "value": "The answer is 4.72" } ], "clip_score": 32.96875, "loss": 2.3505043983459473 }, { "id": "identity_280967", "image": "PMC-VQA/images/PMC7814591_Fig8_345052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does Panel c of the imaging display?\nChoices:\n(A) Left transverse dural sinus \n(B) Absent Enhancement \n(C) Shallow left sigmoid groove \n(D) Small/ hypoplastic left sigmoid dural sinus" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.59375, "loss": 4.230472564697266 }, { "id": "identity_37094", "image": "TextVQA/images/e9d6c10b6c12ee54.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is the battery?" }, { "from": "gpt", "value": "The answer is samsung" } ], "clip_score": 29.875, "loss": 2.6430838108062744 }, { "id": "identity_196304", "image": "PMC-VQA/images/PMC8319602_F1_22092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organ is predominantly visible on the left side of the image?" }, { "from": "gpt", "value": "The answer is Liver" } ], "clip_score": 29.296875, "loss": 3.774204969406128 }, { "id": "identity_216184", "image": "VQA2.0/images/490959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Overcast" } ], "clip_score": 15.5, "loss": 2.6397666931152344 }, { "id": "identity_324060", "image": "TextVQA/images/e3089de36770108f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which state is Hamilton Watch located?" }, { "from": "gpt", "value": "The answer is pennsylvania " } ], "clip_score": 35.59375, "loss": 4.122373104095459 }, { "id": "identity_263283", "image": "IconQA/images/25559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the clip in terms of cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.953125, "loss": 2.3304665088653564 }, { "id": "identity_207664", "image": "PMC-VQA/images/PMC7802110_FIG2_340735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to visualize the structure indicated by the yellow arrow?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 36.4375, "loss": 2.5049819946289062 }, { "id": "identity_279495", "image": "GeoQA+/images/5526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point O is the center of the circumcircle of triangle ABC, and OB is connected. If angle 1 is 37°, what is the measure of angle 2?\nChoices:\n(A) 52°\n(B) 51°\n(C) 53°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 37.375, "loss": 2.4557271003723145 }, { "id": "identity_106368", "image": "Super-CLEVR/images/superCLEVR_new_001596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object directly behind the large blue bus?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 26.6875, "loss": 3.635634660720825 }, { "id": "identity_236776", "image": "ScienceQA/images/18240.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What can Katie and Jerry trade so that Katie gets Jerry's broccoli and almonds, and Jerry gets Katie's tomatoes and oranges?" }, { "from": "gpt", "value": "The answer is Katie trades tomatoes and oranges for broccoli and almonds" } ], "clip_score": 23.984375, "loss": 1.4152544736862183 }, { "id": "identity_110558", "image": "VQA-AS/images/14077.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is hanging on the wall to the left?" }, { "from": "gpt", "value": "The answer is A coat rack" } ], "clip_score": 18.09375, "loss": 3.9166972637176514 }, { "id": "identity_164617", "image": "UniGeo/images/1873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,O是圆心,半径OC⊥弦AB于点D,AB=8,OB=5,求圆的半径OC的长度是多少?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 29.9375, "loss": 2.4082915782928467 }, { "id": "identity_214684", "image": "TextVQA/images/3742f95f789e4f47.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of protective gear is the player closest to the camera wearing?" }, { "from": "gpt", "value": "The answer is Helmet" } ], "clip_score": 18.453125, "loss": 2.3199164867401123 }, { "id": "identity_291276", "image": "PMC-VQA/images/PMC4719561_Fig2_463508.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of imaging is utilized for the preoperative and postoperative scans among the following options: CT scan, X-ray, MRI, or PET scan?\nChoices:\n(A) CT scan \n(B) X-ray \n(C) MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.703125, "loss": 3.7722105979919434 }, { "id": "identity_307024", "image": "ScienceQA/images/8339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is wet paint a solid or something else?\nChoices:\n(A) a solid\n(B) a liquid" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.859375, "loss": 3.8572723865509033 }, { "id": "identity_273509", "image": "DocVQA/images/hmwn0226_34.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the meaning of the acronym RoM?" }, { "from": "gpt", "value": "The answer is Range of motion" } ], "clip_score": 28.796875, "loss": 2.115525484085083 }, { "id": "identity_3106", "image": "UniGeo/images/1316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知直线AD是⊙O的切线,点A为切点,OD交⊙O于点B,点C在⊙O上,且∠ODA=36°,则∠ACB的度数为()\nChoices:\n(A) 54°\n(B) 36°\n(C) 30°\n(D) 27°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.703125, "loss": 2.475811243057251 }, { "id": "identity_213475", "image": "VizWiz/images/VizWiz_train_00000633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the letter on the hat?" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 26.953125, "loss": 3.0884697437286377 }, { "id": "identity_43159", "image": "FigureQA/images/91176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Orange Red ever go below the value of 50 on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.21875, "loss": 3.0066723823547363 }, { "id": "identity_247599", "image": "DocVQA/images/nzxv0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relationship between Wayne and the person who promised to look over the materials, and what specific task did they promise to complete?" }, { "from": "gpt", "value": "The answer is Larry McGhee promised Wayne to review and correct the materials" } ], "clip_score": 21.703125, "loss": 4.243144989013672 }, { "id": "identity_2155", "image": "A-OKVQA/images/57361.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The store of what US company is found in this street?\nChoices:\n(A) nike\n(B) coach\n(C) apple\n(D) macdonald" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.6796875, "loss": 3.880629062652588 }, { "id": "identity_243206", "image": "ScienceQA/images/16360.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the color of the boxes (white vs. black) affect the rate of temperature increase inside the boxes when left in the sun for different durations?" }, { "from": "gpt", "value": "The answer is Black boxes heat up faster than white boxes" } ], "clip_score": 23.015625, "loss": 2.6520321369171143 }, { "id": "identity_110116", "image": "TextVQA/images/314359e5b2eedc3b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of liquor is contained in the bottle with the cream-colored label?" }, { "from": "gpt", "value": "The answer is Irish Cream Liqueur" } ], "clip_score": 26.515625, "loss": 2.3817200660705566 }, { "id": "identity_54189", "image": "DVQA/images/bar_train_00058359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of people who preferred mark more than twice the number of people who preferred shape?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.3125, "loss": 3.087477207183838 }, { "id": "identity_31187", "image": "ChartQA/images/multi_col_20349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people thought there was no real change for All Leagues than Football League One?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 27.765625, "loss": 2.075504779815674 }, { "id": "identity_312444", "image": "TabMWP/images/13170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dr. Copeland, a zoo director, researched ticket prices at other zoos around the country. Which charges less for a senior ticket, the Atlanta Zoo or another zoo?\nChoices:\n(A) Atlanta\n(B) Cleveland" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 38.34375, "loss": 3.7578132152557373 }, { "id": "identity_104217", "image": "PMC-VQA/images/PMC4519440_Fig1_410347.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any medical devices or implants in the image?" }, { "from": "gpt", "value": "The answer is No medical devices or implants" } ], "clip_score": 26.140625, "loss": 3.0502750873565674 }, { "id": "identity_29907", "image": "A-OKVQA/images/37932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: As it is walked by the woman what is inside of the dog's mouth?\nChoices:\n(A) umbrella\n(B) frisbee\n(C) bone\n(D) stick" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.984375, "loss": 4.1263346672058105 }, { "id": "identity_38247", "image": "IconQA/images/20731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many circles are there?\nChoices:\n(A) 4\n(B) 7\n(C) 10\n(D) 5\n(E) 9" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5, "loss": 3.269648790359497 }, { "id": "identity_317370", "image": "DocVQA/images/hrbn0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the invoice number provided?" }, { "from": "gpt", "value": "The answer is ACCTCESystemJul16" } ], "clip_score": 24.6875, "loss": 6.062402248382568 }, { "id": "identity_276042", "image": "PMC-VQA/images/PMC3630103_ppat-1003312-g005_199699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the meaning of the yellow and green arrows shown in the images?\nChoices:\n(A) Regions of inflammation and normal joints \n(B) CHIKV infected and normal joints \n(C) Treatment and control groups \n(D) Regions of inflammation and viruses" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.140625, "loss": 4.127091407775879 }, { "id": "identity_70001", "image": "ScienceQA/images/8653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the temperature of the grill was consistent, what could be a reason for the difference in final temperatures of the salmon?" }, { "from": "gpt", "value": "The answer is Grill hotspots or uneven salmon thickness" } ], "clip_score": 27.390625, "loss": 3.8064043521881104 }, { "id": "identity_205472", "image": "VQA2.0/images/438335.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of terrain is the man standing on?" }, { "from": "gpt", "value": "The answer is Rocky terrain" } ], "clip_score": 20.078125, "loss": 2.9464590549468994 }, { "id": "identity_142786", "image": "DVQA/images/bar_train_00170205.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store has the highest number of units sold for the item represented by the orange bar?" }, { "from": "gpt", "value": "The answer is focus" } ], "clip_score": 32.46875, "loss": 4.613863945007324 }, { "id": "identity_242589", "image": "ScienceQA/images/9469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its neighboring countries shown on the map" }, { "from": "gpt", "value": "The answer is Cuba, The Bahamas, Haiti, Jamaica" } ], "clip_score": 34.40625, "loss": 2.101844310760498 }, { "id": "identity_274006", "image": "TQA/images/abc_question_images/human_system_reproductory_17028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which organ in the reproductive system is represented by the label E?\n(A) URETHRA\n(B) VAS DEFERENS\n(C) TESTIS\n(D) PROSTATE GLAND" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.59375, "loss": 3.8881149291992188 }, { "id": "identity_282872", "image": "A-OKVQA/images/538230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of vehicles are participating in the race? Are they:\n(A) skateboards\n(B) tricycles\n(C) tractors\n(D) dirt bikes" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.75, "loss": 3.5595383644104004 }, { "id": "identity_6927", "image": "ScienceQA/images/17878.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the table, what did Ruth Handler invent?\nChoices:\n(A) the Rubik's Cube\n(B) the Barbie doll" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.796875, "loss": 3.4652950763702393 }, { "id": "identity_225596", "image": "TQA/images/abc_question_images/seasons_12588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the position of the Earth during the summer solstice, and what is the angle of the Earth's axial tilt at this position?" }, { "from": "gpt", "value": "The answer is R, 23.5 degrees" } ], "clip_score": 31.96875, "loss": 2.3451788425445557 }, { "id": "identity_165706", "image": "GeoQA+/images/3174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle ABC is 15 square units before the rotation, what is the area of triangle ADE after the rotation?" }, { "from": "gpt", "value": "The answer is 15 square units" } ], "clip_score": 30.28125, "loss": 1.5189118385314941 }, { "id": "identity_108265", "image": "A-OKVQA/images/124739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sign with the message about Sept. 29?" }, { "from": "gpt", "value": "The answer is The sign is yellow" } ], "clip_score": 22.34375, "loss": 3.2571027278900146 }, { "id": "identity_148433", "image": "DocVQA/images/mtkg0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume number for the citation titled \"Effect of ionizing radiations on vitamin K\"?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 17.515625, "loss": 2.38472580909729 }, { "id": "identity_64983", "image": "ScienceQA/images/1358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted areas?" }, { "from": "gpt", "value": "The answer is The Atlantic Ocean" } ], "clip_score": 27.109375, "loss": 2.913954257965088 }, { "id": "identity_113368", "image": "VQA2.0/images/358781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car parked behind the bus?" }, { "from": "gpt", "value": "The answer is Silver or grey" } ], "clip_score": 20.890625, "loss": 3.579650640487671 }, { "id": "identity_279153", "image": "DocVQA/images/kzph0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who currently holds the position of president at Union National Bank?" }, { "from": "gpt", "value": "The answer is clarence coleman" } ], "clip_score": 24.296875, "loss": 2.699213981628418 }, { "id": "identity_36749", "image": "PlotQA/images/38553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars are there on the 4th tick from the bottom ?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 16.640625, "loss": 2.3892366886138916 }, { "id": "identity_272308", "image": "PMC-VQA/images/PMC6675056_pone.0220380.g001_17388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the white arrows in column 1 represent?\nChoices:\n(A) a tumorous mass \n(B) a blood vessel \n(C) a nasal septum \n(D) a maxillary sinus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.7520575523376465 }, { "id": "identity_44858", "image": "FigureQA/images/66695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines on the graph reach or exceed a value of 55 on the y-axis?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.921875, "loss": 2.435835599899292 }, { "id": "identity_139698", "image": "PlotQA/images/104334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the unit of measurement for the total reserves?" }, { "from": "gpt", "value": "The answer is Current US$" } ], "clip_score": 19.359375, "loss": 4.188933849334717 }, { "id": "identity_163923", "image": "GeoQA+/images/4270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the length of AD in the parallelogram ABCD is twice the length of AE, what is the length of AD?" }, { "from": "gpt", "value": "The answer is 8cm" } ], "clip_score": 30.359375, "loss": 2.759371042251587 }, { "id": "identity_103773", "image": "TextVQA/images/00e2e61cc0b8b5f5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the group performing the Tari Kecak as per the poster?" }, { "from": "gpt", "value": "The answer is Sekar Tampaksiring" } ], "clip_score": 39.21875, "loss": 3.4817168712615967 }, { "id": "identity_278898", "image": "PMC-VQA/images/PMC5619555_f3_221675.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which view is the 3D brain model seen from the bottom?\nChoices:\n(A) View A\n(B) View B\n(C) View C\n(D) View D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.015625, "loss": 4.381042957305908 }, { "id": "identity_61607", "image": "DocVQA/images/ffcc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company mentioned?" }, { "from": "gpt", "value": "The answer is Campbell Soup Company" } ], "clip_score": 14.3828125, "loss": 4.153589248657227 }, { "id": "identity_277233", "image": "AI2D/images/515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If another animal that preys on mice enters the community, which of the following is the most likely outcome based on the partial food web provided? \nChoices:\n(A) The beetle population will increase.\n(B) The fox population will increase.\n(C) The frog population will increase.\n(D) The grasshopper population will decrease." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.8125, "loss": 3.3508808612823486 }, { "id": "identity_337749", "image": "DocVQA/images/xzxg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of this person?" }, { "from": "gpt", "value": "The answer is Hospital Research Associate" } ], "clip_score": 18.9375, "loss": 2.5387587547302246 }, { "id": "identity_156810", "image": "ScienceQA/images/1538.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects displayed in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.140625, "loss": 1.8883981704711914 }, { "id": "identity_12239", "image": "IconQA/images/31783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a house. Which picture is eighth?\nChoices:\n(A) paw\n(B) dog\n(C) house" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.671875, "loss": 4.626803398132324 }, { "id": "identity_228058", "image": "Geometry3K/images/1867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x + y \\)" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.828125, "loss": 2.5378291606903076 }, { "id": "identity_60054", "image": "DVQA/images/bar_train_00114788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value for the \"manner\" category across all groups?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 19.4375, "loss": 2.6620242595672607 }, { "id": "identity_23266", "image": "DVQA/images/bar_train_00179420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third stack of bars from the left?" }, { "from": "gpt", "value": "The answer is peace" } ], "clip_score": 21.46875, "loss": 4.3259782791137695 }, { "id": "identity_218638", "image": "A-OKVQA/images/136323.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many boats have visible red structures on them?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.9375, "loss": 2.515611410140991 }, { "id": "identity_256240", "image": "A-OKVQA/images/136932.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the attire and food items visible in the image, identify the country where this festival is most prominently celebrated and name two traditional elements associated with this festival" }, { "from": "gpt", "value": "The answer is Germany, Lederhosen, Pretzels" } ], "clip_score": 30.28125, "loss": 2.4952011108398438 }, { "id": "identity_251875", "image": "A-OKVQA/images/132760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What event is taking place, and what role does the man in the suit and hat play in it?" }, { "from": "gpt", "value": "The answer is A horse show, and he is judging horses" } ], "clip_score": 28.546875, "loss": 3.3217527866363525 }, { "id": "identity_227184", "image": "TextVQA/images/e874aec4a036fb1f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the main color of the drink banner in the background, and what is the name of the stadium where this event is taking place?" }, { "from": "gpt", "value": "The answer is white, Emirates Stadium" } ], "clip_score": 30.125, "loss": 2.98940372467041 }, { "id": "identity_304363", "image": "PMC-VQA/images/PMC6518670_Fig2_480121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow line in the image indicate? \nChoices:\n(A) The color of the atlas scan \n(B) The cutting plane of velocity encoded MR scan \n(C) The angle of the slice \n(D) The type of MRI scan used for imaging the brain." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.515625, "loss": 4.110815048217773 }, { "id": "identity_323868", "image": "TabMWP/images/9795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A car dealership tracked the number of cars sold each month. What is the range of these numbers?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.90625, "loss": 3.6536056995391846 }, { "id": "identity_126570", "image": "A-OKVQA/images/571842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any non-umbrella item with a floral pattern?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.671875, "loss": 3.3195221424102783 }, { "id": "identity_5949", "image": "VQA2.0/images/470211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you easily get sunburn in this setting?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.21875, "loss": 3.2997353076934814 }, { "id": "identity_227583", "image": "FigureQA/images/3009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Cornflower and Orange?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 33.21875, "loss": 1.8913624286651611 }, { "id": "identity_136308", "image": "ChartQA/images/9598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the percentages of Republicans and Independents who think using military force is the best way to defeat terrorism?" }, { "from": "gpt", "value": "The answer is 115%" } ], "clip_score": 38.5, "loss": 1.9042680263519287 }, { "id": "identity_289192", "image": "ChartQA/images/6530.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many instances has the satisfaction rate exceeded 25?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 15.6953125, "loss": 2.8984506130218506 }, { "id": "identity_17525", "image": "VizWiz/images/VizWiz_train_00015853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of this glasses?" }, { "from": "gpt", "value": "The answer is brown" } ], "clip_score": 31.890625, "loss": 3.9465560913085938 }, { "id": "identity_299001", "image": "FigureQA/images/96174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the least valuable option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.234375, "loss": 3.8600497245788574 }, { "id": "identity_9968", "image": "VQA-AS/images/14911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is she doing?" }, { "from": "gpt", "value": "The answer is yoga" } ], "clip_score": 25.796875, "loss": 2.430187702178955 }, { "id": "identity_11686", "image": "VQA-AS/images/15368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a living room dining room combo?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 30.125, "loss": 3.356790065765381 }, { "id": "identity_94147", "image": "IconQA/images/103100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 2 tennis balls from the top row, how many tennis balls will be left in total?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 34.75, "loss": 2.5491631031036377 }, { "id": "identity_126172", "image": "VQA2.0/images/495633.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people wearing a red shirt in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.21875, "loss": 3.3624541759490967 }, { "id": "identity_231937", "image": "GeoQA+/images/5132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⁀{BC}=⁀{CD}=⁀{DE},已知AB是⊙O的直径,∠BOC=40°,求∠AOD与∠AOE的和是多少度?" }, { "from": "gpt", "value": "The answer is 200°" } ], "clip_score": 32.6875, "loss": 2.12951397895813 }, { "id": "identity_174199", "image": "TabMWP/images/5513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the author write the least number of pages?" }, { "from": "gpt", "value": "The answer is Saturday" } ], "clip_score": 34.9375, "loss": 3.0490708351135254 }, { "id": "identity_25977", "image": "AI2D/images/592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many stages are in this complete metamorphosis?\nChoices:\n(A) 2\n(B) 3\n(C) 5\n(D) 4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.65625, "loss": 2.4904961585998535 }, { "id": "identity_312970", "image": "CLEVR-Math/images/CLEVR_train_028705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all metal objects. Remove all blue things. How many objects remain?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.984375, "loss": 2.4495913982391357 }, { "id": "identity_295469", "image": "TabMWP/images/33314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the data provided in the table by the financial aid office at Richmond University regarding the number of students receiving scholarships, what was the percentage change between 2017 and 2018?" }, { "from": "gpt", "value": "The answer is -1" } ], "clip_score": 33.75, "loss": 3.7151553630828857 }, { "id": "identity_319843", "image": "TabMWP/images/25680.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Check out the schedule provided. How much time is needed to travel from Wildgrove to Newberry?\nChoices:\n(A) 1 hour and 30 minutes\n(B) 2 hours and 15 minutes\n(C) 1 hour and 45 minutes\n(D) 2 hours" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.6875, "loss": 3.0531914234161377 }, { "id": "identity_289628", "image": "CLEVR-Math/images/CLEVR_train_002194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the tiny blue cubes and then all the blue cubes, how many objects remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.6875, "loss": 1.9248167276382446 }, { "id": "identity_241627", "image": "DVQA/images/bar_train_00114935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are preferred by less than 4 people in all categories combined?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 30.140625, "loss": 3.0344078540802 }, { "id": "identity_327990", "image": "Geometry3K/images/301.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x. Round to the nearest tenth.\nChoices:\n(A) 2.6\n(B) 5.2\n(C) 10.4\n(D) 27.2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.09375, "loss": 3.335176944732666 }, { "id": "identity_53476", "image": "MapQA/images/map_5798.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more states with a children number range of 93-97 than 103-107?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 33.46875, "loss": 3.1977531909942627 }, { "id": "identity_324142", "image": "CLEVR-Math/images/CLEVR_train_064630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add one green metallic cylinder. How many green metallic cylinders are there now?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.203125, "loss": 2.2666940689086914 }, { "id": "identity_182405", "image": "PMC-VQA/images/PMC5650637_F3_230005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the imaging technique used to capture the image of the right hind foot?" }, { "from": "gpt", "value": "The answer is MRI (Magnetic Resonance Imaging)" } ], "clip_score": 31.4375, "loss": 1.2778724431991577 }, { "id": "identity_105338", "image": "A-OKVQA/images/563730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the green object a single-use or reusable utensil?" }, { "from": "gpt", "value": "The answer is It appears to be a single-use utensil" } ], "clip_score": 32.09375, "loss": 2.2032456398010254 }, { "id": "identity_157824", "image": "ScienceQA/images/19004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the final temperature between the vial on the counter and the vial in the refrigerator?" }, { "from": "gpt", "value": "The answer is 1.9°C" } ], "clip_score": 31.078125, "loss": 2.0990827083587646 }, { "id": "identity_169014", "image": "TabMWP/images/37672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy one of each item, how much will you spend in total?" }, { "from": "gpt", "value": "The answer is $28" } ], "clip_score": 24.671875, "loss": 2.210784673690796 }, { "id": "identity_233445", "image": "DVQA/images/bar_train_00163452.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage value for the 'finger' category across all four types (mercy, fight, relief, and temple)?" }, { "from": "gpt", "value": "The answer is 100%" } ], "clip_score": 29.953125, "loss": 1.5069254636764526 }, { "id": "identity_305386", "image": "TQA/images/abc_question_images/human_system_muscular_16161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is labeled as N in this diagram of the human muscular system?\nChoices:\n(A) FRONTALIS\n(B) GRACILIS\n(C) SARTORIUS\n(D) DELTOID" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.6875, "loss": 4.001673221588135 }, { "id": "identity_129128", "image": "TextVQA/images/9b0e803a144e8bb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the action being performed by the player in the red jersey?" }, { "from": "gpt", "value": "The answer is Heading the ball" } ], "clip_score": 21.21875, "loss": 2.9914321899414062 }, { "id": "identity_97444", "image": "TabMWP/images/20989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of hours Samuel worked per day in the past 5 days?" }, { "from": "gpt", "value": "The answer is 3.8 hours" } ], "clip_score": 29.5625, "loss": 1.8922920227050781 }, { "id": "identity_144151", "image": "DocVQA/images/fkng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the fax number of the recipient?" }, { "from": "gpt", "value": "The answer is 9-011-81-355-721-464" } ], "clip_score": 24.484375, "loss": 2.6232802867889404 }, { "id": "identity_171689", "image": "TabMWP/images/38345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of math teachers in all the high schools combined?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 31.703125, "loss": 2.058065176010132 }, { "id": "identity_217445", "image": "PMC-VQA/images/PMC4312691_F1_354340.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we identify the presence of dental fillings in this image?" }, { "from": "gpt", "value": "The answer is No dental fillings present" } ], "clip_score": 25.984375, "loss": 3.5979723930358887 }, { "id": "identity_320776", "image": "A-OKVQA/images/442685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being relied on to hold them up?\nChoices:\n(A) two horses\n(B) person\n(C) board\n(D) string" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.8515625, "loss": 4.77589225769043 }, { "id": "identity_88215", "image": "Geometry3K/images/1644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side y in the triangle?" }, { "from": "gpt", "value": "The answer is y ≈ 13.44" } ], "clip_score": 36.40625, "loss": 2.512287139892578 }, { "id": "identity_7082", "image": "FigureQA/images/75328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Green the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.140625, "loss": 3.790950059890747 }, { "id": "identity_319415", "image": "GeoQA+/images/2818.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点P为O外一点,PA为⊙O的切线,A为切点,PO交⊙O于点B。若∠P=30°,OB=3,则线段AP的长为多少?\n\n选项:\n(A) 6√{6}\n(B) 3√{3}\n(C) 6\n(D) 3" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.59375, "loss": 2.245420455932617 }, { "id": "identity_172677", "image": "TabMWP/images/14929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Suzie has $200, can she afford to buy the program signed by a famous dancer and the book signed by a famous author?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.609375, "loss": 2.929539203643799 }, { "id": "identity_144442", "image": "DocVQA/images/hlmf0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a source mentioned on the document?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.5546875, "loss": 3.248323917388916 }, { "id": "identity_19769", "image": "VQA2.0/images/241886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are flying?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.4375, "loss": 2.651686668395996 }, { "id": "identity_319678", "image": "A-OKVQA/images/399512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many minutes can someone legally park here?\nChoices:\n(A) eighty\n(B) sixty\n(C) thirty\n(D) seventy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.5, "loss": 3.0125784873962402 }, { "id": "identity_131739", "image": "A-OKVQA/images/196341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 7 people" } ], "clip_score": 20.703125, "loss": 2.3919832706451416 }, { "id": "identity_252291", "image": "PMC-VQA/images/PMC5121968_Fig1_98781.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the anatomical structures visible in the midline sagittal image of the knee joint and describe their functions" }, { "from": "gpt", "value": "The answer is Femur, Tibia, Patella, Menisci, Ligaments" } ], "clip_score": 25.6875, "loss": 2.1320159435272217 }, { "id": "identity_336209", "image": "GeoQA+/images/4629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,菱形ABCD的对角线AC,BD相交于点O,E为DC的中点,若菱形的周长为16,OE的长度为()\n选项:\n(A) 2\n(B) 1\n(C) 4\n(D) 3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.015625, "loss": 2.5398755073547363 }, { "id": "identity_27807", "image": "A-OKVQA/images/226594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color jacket is the leftmost person wearing?\nChoices:\n(A) black\n(B) purple\n(C) blue\n(D) green" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.203125, "loss": 4.257970809936523 }, { "id": "identity_290098", "image": "DVQA/images/bar_train_00199504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which element is symbolized by the color green?" }, { "from": "gpt", "value": "The answer is bench" } ], "clip_score": 23.34375, "loss": 2.529834508895874 }, { "id": "identity_223075", "image": "PMC-VQA/images/PMC4494931_F1_404044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the anatomy is predominantly featured in this image?" }, { "from": "gpt", "value": "The answer is Brain" } ], "clip_score": 27.515625, "loss": 3.0266523361206055 }, { "id": "identity_286815", "image": "TextVQA/images/ed15edb180c89ed2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which month does the show occur?" }, { "from": "gpt", "value": "The answer is mayo" } ], "clip_score": 12.5, "loss": 4.522615909576416 }, { "id": "identity_229892", "image": "A-OKVQA/images/412767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of coffee container in the background and describe two key features that distinguish it from the other types listed" }, { "from": "gpt", "value": "The answer is French press with plunger and mesh filter" } ], "clip_score": 14.1796875, "loss": 3.0821943283081055 }, { "id": "identity_213617", "image": "A-OKVQA/images/70927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of barrier is separating the animals from the background area?" }, { "from": "gpt", "value": "The answer is A white horizontal bar fence" } ], "clip_score": 22.859375, "loss": 4.414732933044434 }, { "id": "identity_259979", "image": "TabMWP/images/11072.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does Jill need to buy a python and a cockatiel compared to buying an alpaca and a parrot?" }, { "from": "gpt", "value": "The answer is 1,462" } ], "clip_score": 28.828125, "loss": 2.744361639022827 }, { "id": "identity_219522", "image": "PMC-VQA/images/PMC5187572_brainsci-06-00058-f005_108181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measurement unit used for the MRI coordinates in the image?" }, { "from": "gpt", "value": "The answer is millimeters (mm)" } ], "clip_score": 29.578125, "loss": 1.7912206649780273 }, { "id": "identity_235893", "image": "IconQA/images/35368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the positions of all the planes in the sequence?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 14.2109375, "loss": 2.023500919342041 }, { "id": "identity_104244", "image": "VQA-AS/images/7099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a cloud in the sky?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.53125, "loss": 3.226466655731201 }, { "id": "identity_30762", "image": "MapQA/images/map_9799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Among the states that border West Virginia , which have the lowest value ?" }, { "from": "gpt", "value": "The answer is Maryland" } ], "clip_score": 27.171875, "loss": 2.725468873977661 }, { "id": "identity_12882", "image": "GeoQA+/images/4786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,∠A=90°,AB=4,BC=5,则\\cosB等于()\nChoices:\n(A) \\frac{4}{5}\n(B) \\frac{3}{5}\n(C) \\frac{4}{3}\n(D) \\frac{3}{4}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.84375, "loss": 2.4646718502044678 }, { "id": "identity_207406", "image": "VQA2.0/images/561714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many kites are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.984375, "loss": 2.4262773990631104 }, { "id": "identity_155683", "image": "TQA/images/question_images/convection_of_air_8046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What causes the air to rise as shown by the red arrows?" }, { "from": "gpt", "value": "The answer is Heat from the sun" } ], "clip_score": 35.75, "loss": 2.5777547359466553 }, { "id": "identity_214931", "image": "PMC-VQA/images/PMC8971305_fig2_241135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of sinusitis in this image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 28.96875, "loss": 2.783069372177124 }, { "id": "identity_172845", "image": "TabMWP/images/9560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of people on the bus for the given week?" }, { "from": "gpt", "value": "The answer is 91.857" } ], "clip_score": 28.6875, "loss": 2.66324520111084 }, { "id": "identity_102386", "image": "TabMWP/images/13068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Mandy buys twice as much Kalamata olives as Spanish olives and she buys 1 kilogram of Spanish olives, how much does she spend on Kalamata olives?" }, { "from": "gpt", "value": "The answer is $6" } ], "clip_score": 33.875, "loss": 2.691387176513672 }, { "id": "identity_312373", "image": "TabMWP/images/20923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Each pond has 2 frogs. How many frogs are in some ponds?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 35.75, "loss": 2.278120756149292 }, { "id": "identity_163300", "image": "GeoQA+/images/299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle APF?" }, { "from": "gpt", "value": "The answer is √(25/4)" } ], "clip_score": 33.46875, "loss": 2.193586826324463 }, { "id": "identity_62665", "image": "DocVQA/images/msyh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What campaign year(s) does the report cover?" }, { "from": "gpt", "value": "The answer is CAMPAIGN 1950-1951" } ], "clip_score": 24.3125, "loss": 1.8705096244812012 }, { "id": "identity_36154", "image": "AI2D/images/1955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A part of the food web is shown below. What does Copepods feed on?\nChoices:\n(A) Microzooplankton\n(B) Phytoplankton\n(C) None of the above\n(D) Both of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.71875, "loss": 3.6005959510803223 }, { "id": "identity_168119", "image": "IconQA/images/70404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you were to cut the ruler at the 5 cm mark, what would be the length of the remaining piece from the cut to the end of the ruler?" }, { "from": "gpt", "value": "The answer is 5 centimeters" } ], "clip_score": 33.75, "loss": 1.7235249280929565 }, { "id": "identity_331199", "image": "TabMWP/images/15890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tanks have fewer than 100 fish?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 26.234375, "loss": 2.43082594871521 }, { "id": "identity_260362", "image": "FigureQA/images/24319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Dark Turquoise and Light Seafoam, and how does it compare to the percentage of Turquoise?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 39.5, "loss": 4.701560974121094 }, { "id": "identity_211386", "image": "A-OKVQA/images/457922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the logo on the umpire's shirt?" }, { "from": "gpt", "value": "The answer is MLB logo" } ], "clip_score": 21.46875, "loss": 3.469496011734009 }, { "id": "identity_151607", "image": "ScienceQA/images/15489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the pineapple is typically consumed?" }, { "from": "gpt", "value": "The answer is The flesh or the inner part" } ], "clip_score": 33.21875, "loss": 3.2049033641815186 }, { "id": "identity_50581", "image": "FigureQA/images/72296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of Periwinkle at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 25.34375, "loss": 2.173433303833008 }, { "id": "identity_6332", "image": "VQA2.0/images/423184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two colors are the flowers in the vase?" }, { "from": "gpt", "value": "The answer is purple and blue" } ], "clip_score": 30.09375, "loss": 2.129441022872925 }, { "id": "identity_117522", "image": "A-OKVQA/images/220390.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 15.3828125, "loss": 4.481432914733887 }, { "id": "identity_217827", "image": "A-OKVQA/images/111525.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is directly behind the cart?" }, { "from": "gpt", "value": "The answer is A shop or market stall" } ], "clip_score": 22.5, "loss": 3.495119094848633 }, { "id": "identity_157979", "image": "ScienceQA/images/17915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the image?" }, { "from": "gpt", "value": "The answer is New Mexico" } ], "clip_score": 34.4375, "loss": 2.80245304107666 }, { "id": "identity_301689", "image": "PMC-VQA/images/PMC7178307_materials-13-01641-f005_157394.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of imaging technique is depicted in this study?\nChoices:\n(A) Transmission electron microscopy \n(B) Scanning electron microscopy \n(C) Fluorescence microscopy \n(D) Confocal microscopy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.84375, "loss": 3.9635303020477295 }, { "id": "identity_88202", "image": "UniGeo/images/3022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AC is the diameter of circle O and ∠BAC is 35°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 34.8125, "loss": 2.1618049144744873 }, { "id": "identity_84193", "image": "GeoQA+/images/24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the base area of the cone?" }, { "from": "gpt", "value": "The answer is 9π square units" } ], "clip_score": 33.03125, "loss": 2.127241849899292 }, { "id": "identity_125513", "image": "A-OKVQA/images/225919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the man's jacket?" }, { "from": "gpt", "value": "The answer is Navy blue" } ], "clip_score": 13.9453125, "loss": 3.890645980834961 }, { "id": "identity_290752", "image": "IconQA/images/44733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the pencil in inches when measured to the nearest inch using the ruler?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 39.15625, "loss": 2.685204267501831 }, { "id": "identity_201602", "image": "A-OKVQA/images/386592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand name is displayed in large letters at the top of the building in the background?" }, { "from": "gpt", "value": "The answer is CHAMPS" } ], "clip_score": 16.4375, "loss": 2.777916193008423 }, { "id": "identity_280464", "image": "PMC-VQA/images/PMC7346254_fig1_208834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the micronodules located? \n(A) right upper lobe \n(B) left lower lobe \n(C) left upper lobe \n(D) right lower lobe" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.78125, "loss": 3.951115131378174 }, { "id": "identity_133338", "image": "A-OKVQA/images/364075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people in the background of the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.21875, "loss": 3.534655809402466 }, { "id": "identity_118265", "image": "VQA2.0/images/395396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any street signs visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.3125, "loss": 3.1384270191192627 }, { "id": "identity_111857", "image": "VQA2.0/images/356834.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many letters are missing in the signboard?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 17.421875, "loss": 2.2438018321990967 }, { "id": "identity_231320", "image": "Geometry3K/images/972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the area of the parallelogram and then determine the length of the diagonal using the Pythagorean theorem. Round to the nearest tenth if necessary" }, { "from": "gpt", "value": "The answer is 12.2 inches" } ], "clip_score": 35.40625, "loss": 2.0603086948394775 }, { "id": "identity_244521", "image": "VQA-AS/images/18593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different objects are present in the image, and what are they?" }, { "from": "gpt", "value": "The answer is seven objects: hamburger, older girl, younger girl, cup, cat, tree, sun, flower" } ], "clip_score": 25.359375, "loss": 2.781714916229248 }, { "id": "identity_281067", "image": "TabMWP/images/2342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the past 4 days, Alvin kept track of the number of laps he ran. On which day did Alvin run the least number of laps?\nChoices:\n(A) Sunday\n(B) Monday\n(C) Tuesday\n(D) Wednesday" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.21875, "loss": 3.848771572113037 }, { "id": "identity_90767", "image": "Geometry3K/images/547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the diagonals of parallelogram LKMJ intersect at point J, and the length of diagonal LK is 10 units, what is the length of segment LJ?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 37.9375, "loss": 1.8565845489501953 }, { "id": "identity_243986", "image": "A-OKVQA/images/574215.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main ingredients visible on this pizza, and how do they indicate the type of sauce used?" }, { "from": "gpt", "value": "The answer is Cheese, white sauce, and herbs" } ], "clip_score": 30.65625, "loss": 2.810786724090576 }, { "id": "identity_90831", "image": "UniGeo/images/506.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shaded sector is rolled to form a cone, what is the volume of the cone?" }, { "from": "gpt", "value": "The answer is 1/3π(2/π)^2√(4^2 - (2/π)^2)" } ], "clip_score": 29.71875, "loss": 1.841059684753418 }, { "id": "identity_270859", "image": "TextVQA/images/8309f77b504ef446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what this says?" }, { "from": "gpt", "value": "The answer is happry" } ], "clip_score": 16.390625, "loss": 4.247371673583984 }, { "id": "identity_159570", "image": "Geometry3K/images/1632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sine of the angle opposite the side with length 20?" }, { "from": "gpt", "value": "The answer is 22 / sqrt(884)" } ], "clip_score": 34.53125, "loss": 1.8903688192367554 }, { "id": "identity_153801", "image": "ScienceQA/images/14289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing accessory is depicted in the image?" }, { "from": "gpt", "value": "The answer is Necktie" } ], "clip_score": 33.90625, "loss": 3.021822452545166 }, { "id": "identity_88264", "image": "Geometry3K/images/396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is 54" } ], "clip_score": 37.5, "loss": 2.2764461040496826 }, { "id": "identity_205986", "image": "A-OKVQA/images/479213.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tray holding the cake?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 19.75, "loss": 3.4055237770080566 }, { "id": "identity_31334", "image": "VizWiz/images/VizWiz_train_00017646.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label say on this bottle? Thank you." }, { "from": "gpt", "value": "The answer is peroxyl colgate" } ], "clip_score": 36.4375, "loss": 4.023387908935547 }, { "id": "identity_31405", "image": "DocVQA/images/hjmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the value of social security charge mentioned in the given page ?" }, { "from": "gpt", "value": "The answer is 21.34" } ], "clip_score": 23.125, "loss": 2.452749252319336 }, { "id": "identity_48284", "image": "FigureQA/images/45633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Maroon section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 33%" } ], "clip_score": 38.25, "loss": 2.228581190109253 }, { "id": "identity_191134", "image": "VQA2.0/images/306866.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum parking duration allowed at this location?" }, { "from": "gpt", "value": "The answer is 48 hours" } ], "clip_score": 22.59375, "loss": 2.315154552459717 }, { "id": "identity_324137", "image": "A-OKVQA/images/308900.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In what type of setting is this place located?\nChoices:\n(A) suburban\n(B) desert\n(C) urban\n(D) rural" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.953125, "loss": 4.5485944747924805 }, { "id": "identity_23313", "image": "TextVQA/images/f13e48de16a839e0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what basketball association is displayed on these jerseys?" }, { "from": "gpt", "value": "The answer is nba" } ], "clip_score": 19.53125, "loss": 2.433656930923462 }, { "id": "identity_59096", "image": "FigureQA/images/42846.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the largest section in the pie chart?" }, { "from": "gpt", "value": "The answer is Dark Orange" } ], "clip_score": 37.5, "loss": 3.2526330947875977 }, { "id": "identity_118947", "image": "Super-CLEVR/images/superCLEVR_new_001785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are depicted in the image?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 24.78125, "loss": 2.6181068420410156 }, { "id": "identity_43118", "image": "FigureQA/images/65190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Medium Seafoam ever exceed a value of 90 on the y-axis?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.59375, "loss": 2.8806216716766357 }, { "id": "identity_272480", "image": "TabMWP/images/10928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After winning the student body president election at Salem Middle School, Austen counted the number of votes he received from each grade. What fraction of his total votes came from the seventh grade? Please simplify your answer." }, { "from": "gpt", "value": "The answer is 23/44" } ], "clip_score": 35.96875, "loss": 2.7699227333068848 }, { "id": "identity_184666", "image": "PMC-VQA/images/PMC6636588_f2-cln_74p1_10112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the circled lesion located in the frontal lobe?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.46875, "loss": 3.1482768058776855 }, { "id": "identity_28121", "image": "IconQA/images/1642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many circles are there?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 28.875, "loss": 2.46366286277771 }, { "id": "identity_212215", "image": "VQA2.0/images/489107.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic cone next to the person in the high visibility vest?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 22.140625, "loss": 3.1602530479431152 }, { "id": "identity_125470", "image": "TextVQA/images/ff0ef7dc6d86e204.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is sponsoring the event according to the board?" }, { "from": "gpt", "value": "The answer is Gillette" } ], "clip_score": 16.015625, "loss": 2.7525508403778076 }, { "id": "identity_90955", "image": "Geometry3K/images/1759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 4?" }, { "from": "gpt", "value": "The answer is 124°" } ], "clip_score": 34.21875, "loss": 2.3527774810791016 }, { "id": "identity_54371", "image": "FigureQA/images/48728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Dark Gold section in the pie chart?" }, { "from": "gpt", "value": "The answer is 55.6%" } ], "clip_score": 37.1875, "loss": 1.829509973526001 }, { "id": "identity_241846", "image": "TQA/images/question_images/parts_worm_7311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the functions of the anterior and posterior suckers in the animal shown in the picture?" }, { "from": "gpt", "value": "The answer is Attachment, feeding, locomotion" } ], "clip_score": 28.109375, "loss": 2.8374979496002197 }, { "id": "identity_209409", "image": "VQA-AS/images/10716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tables are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.359375, "loss": 2.49960994720459 }, { "id": "identity_70594", "image": "ScienceQA/images/11814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents are visible in the image?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 30.640625, "loss": 4.475855827331543 }, { "id": "identity_5351", "image": "PMC-VQA/images/PMC6322781_pone.0209674.g008_419942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sections of the abdomen are shown in the images? \nChoices:\n(A) sagittal, transaxial, and oblique \n(B) coronal, transaxial, and axial \n(C) sagittal, transaxial, and coronal \n(D) oblique, coronal, and axial " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 3.6798133850097656 }, { "id": "identity_125415", "image": "A-OKVQA/images/286376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many complete skateboards are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.15625, "loss": 4.371509552001953 }, { "id": "identity_217536", "image": "PMC-VQA/images/PMC8172786_F3_458463.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled as \"C\" in the image?" }, { "from": "gpt", "value": "The answer is Sacroiliac Joint (SIJ)" } ], "clip_score": 25.65625, "loss": 2.0456173419952393 }, { "id": "identity_216232", "image": "A-OKVQA/images/501571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the horses that the mounted persons are riding?" }, { "from": "gpt", "value": "The answer is Two black and two brown horses" } ], "clip_score": 27.9375, "loss": 2.946234941482544 }, { "id": "identity_101002", "image": "CLEVR-Math/images/CLEVR_train_028705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are cubes?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 26.078125, "loss": 2.9605636596679688 }, { "id": "identity_181924", "image": "CLEVR-Math/images/CLEVR_train_028445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are both small and brown?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.421875, "loss": 2.5354719161987305 }, { "id": "identity_72890", "image": "ScienceQA/images/11484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are predominantly seen in the park area of the image?" }, { "from": "gpt", "value": "The answer is Deciduous" } ], "clip_score": 14.6796875, "loss": 2.9672691822052 }, { "id": "identity_177243", "image": "TabMWP/images/13848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes less did Dalton spend on homework on Friday compared to Monday?" }, { "from": "gpt", "value": "The answer is 7 minutes" } ], "clip_score": 38.625, "loss": 1.9948347806930542 }, { "id": "identity_42524", "image": "FigureQA/images/3824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value for Dark Slate greater than the value for Light Sky Blue?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.34375, "loss": 3.7058746814727783 }, { "id": "identity_282742", "image": "Super-CLEVR/images/superCLEVR_new_001217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any matte buses that are the same size as the red rubber object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 22.796875, "loss": 5.386107444763184 }, { "id": "identity_283076", "image": "PMC-VQA/images/PMC3395576_F4_144858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is contained in the compound glands?\nChoices:\n(A) Cytoplasm of mucosal epithelium \n(B) Cytoplasm of columnar cells \n(C) Cytoplasm of both \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.09375, "loss": 4.189867973327637 }, { "id": "identity_132975", "image": "VQA2.0/images/580919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any buildings visible in the background?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.578125, "loss": 3.2952704429626465 }, { "id": "identity_60678", "image": "DocVQA/images/xkcc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Samuel J. Fomon become a member of the American Board of Nutrition?" }, { "from": "gpt", "value": "The answer is 1968" } ], "clip_score": 26.03125, "loss": 1.8400883674621582 }, { "id": "identity_137277", "image": "ChartQA/images/two_col_5386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between male and female patients?" }, { "from": "gpt", "value": "The answer is 2.8%" } ], "clip_score": 28.40625, "loss": 1.8853726387023926 }, { "id": "identity_129371", "image": "PMC-VQA/images/PMC8537352_insects-12-00850-f002_92119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the leaf surface surrounding the gall?" }, { "from": "gpt", "value": "The answer is Wrinkled with hair-like structures" } ], "clip_score": 26.46875, "loss": 2.7460010051727295 }, { "id": "identity_323839", "image": "IconQA/images/28811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of this is pink?\nChoices:\n(A) 1/5\n(B) 1/4\n(C) 1/2\n(D) 1/3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.65625, "loss": 3.357382297515869 }, { "id": "identity_102911", "image": "CLEVR-Math/images/CLEVR_train_043427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither cubes nor cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.4375, "loss": 2.4252805709838867 }, { "id": "identity_181375", "image": "IconQA/images/93047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green buckets are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.59375, "loss": 2.380106210708618 }, { "id": "identity_77676", "image": "TQA/images/abc_question_images/nuclear_energy_18110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letters represent the two smaller nuclei produced after the fission at point 'X'?" }, { "from": "gpt", "value": "The answer is The letters 'K' and 'U' represent the two smaller nuclei" } ], "clip_score": 31.5, "loss": 1.6583796739578247 }, { "id": "identity_171096", "image": "IconQA/images/25811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the rectangles identical in size?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.40625, "loss": 3.248547077178955 }, { "id": "identity_144864", "image": "DVQA/images/bar_train_00088906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value of 'rage' across all categories?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 21.15625, "loss": 2.4278695583343506 }, { "id": "identity_240458", "image": "GeoQA+/images/4986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 在△ABC中,D、E分别是BC、AC中点,BF平分∠ABC,交DE于点F。已知AB=8,BC=6,求△BFD的面积。" }, { "from": "gpt", "value": "The answer is \\(\\frac{36}{7}\\)" } ], "clip_score": 30.03125, "loss": 2.4693520069122314 }, { "id": "identity_2817", "image": "DocVQA/images/qxnl0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what time is the meeting Adjourned?" }, { "from": "gpt", "value": "The answer is 17:30" } ], "clip_score": 20.984375, "loss": 1.457674264907837 }, { "id": "identity_266565", "image": "TabMWP/images/16575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum number of rotten tomatoes recorded in each barrel received by the Oakland Soup Company?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 16.296875, "loss": 2.5500969886779785 }, { "id": "identity_7000", "image": "FigureQA/images/45383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green the maximum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.21875, "loss": 4.334036827087402 }, { "id": "identity_204106", "image": "PMC-VQA/images/PMC6327415_Fig13_420938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text or labeling present in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is text and labeling" } ], "clip_score": 15.34375, "loss": 2.456876039505005 }, { "id": "identity_201020", "image": "A-OKVQA/images/448694.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many spectators can be seen in the background?" }, { "from": "gpt", "value": "The answer is More than twenty" } ], "clip_score": 15.1875, "loss": 3.4407060146331787 }, { "id": "identity_265775", "image": "ScienceQA/images/17700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repelling effect on each other?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.109375, "loss": 4.066391944885254 }, { "id": "identity_109108", "image": "PMC-VQA/images/PMC3973855_F1_278310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely location of the abnormality within the body?" }, { "from": "gpt", "value": "The answer is Mediastinal area of the chest" } ], "clip_score": 32.78125, "loss": 3.1826181411743164 }, { "id": "identity_283655", "image": "A-OKVQA/images/366179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which types of mass transit units are parked here? Are they buses, trains, cabs, or cars?\nChoices:\n(A) busses\n(B) trains\n(C) cabs\n(D) cars" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.125, "loss": 3.969648838043213 }, { "id": "identity_227316", "image": "ChartQA/images/multi_col_40268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total change in average daily rate in Canadian dollars for the green line from Q1 to Q4 in 2016, and what is the average rate for the same period?" }, { "from": "gpt", "value": "The answer is Total change is 14, average rate is 187" } ], "clip_score": 24.78125, "loss": 2.4359731674194336 }, { "id": "identity_81164", "image": "UniGeo/images/1548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD is parallel to BE and CF, and lines a and b intersect these parallel lines at points A, B, C and D, E, F respectively, and the length of segment BC is twice the length of segment AB, what is the length of segment AB?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.6875, "loss": 2.8326375484466553 }, { "id": "identity_67003", "image": "TQA/images/abc_question_images/nuclear_energy_18117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'Y' indicate in the context of the power plant?" }, { "from": "gpt", "value": "The answer is Cooling water source or reservoir" } ], "clip_score": 33.84375, "loss": 3.0008013248443604 }, { "id": "identity_98677", "image": "TabMWP/images/1044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which teacher has the largest class size?" }, { "from": "gpt", "value": "The answer is Mr. Watson" } ], "clip_score": 34.25, "loss": 2.2466065883636475 }, { "id": "identity_60268", "image": "DVQA/images/bar_train_00025906.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many categories are represented in the graph?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.296875, "loss": 2.5351946353912354 }, { "id": "identity_293566", "image": "TabMWP/images/19383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During his practice sessions, a baseball player kept track of the number of baseballs he hit each day. On which day did he hit the highest number of baseballs?\nChoices:\n(A) Monday\n(B) Tuesday\n(C) Wednesday\n(D) Thursday" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.125, "loss": 3.444526195526123 }, { "id": "identity_269005", "image": "DocVQA/images/nrmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the individuals known as the Architects?" }, { "from": "gpt", "value": "The answer is MURPHY AND MACKEY" } ], "clip_score": 18.984375, "loss": 2.160919427871704 }, { "id": "identity_16802", "image": "FigureQA/images/77642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Firebrick have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.03125, "loss": 3.895827531814575 }, { "id": "identity_151430", "image": "ScienceQA/images/3271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is bordered by four Great Lakes?" }, { "from": "gpt", "value": "The answer is Michigan" } ], "clip_score": 22.671875, "loss": 2.7620482444763184 }, { "id": "identity_330072", "image": "UniGeo/images/479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的弦,AB=10,点C是⊙O上的一个点,且∠C=45°。若点M、N分别是弦AB、AC的中点,则MN的最大值是()\n选项:\n(A) 5√{2}\n(B) 5\n(C) √{5}\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.375, "loss": 2.472130537033081 }, { "id": "identity_58677", "image": "FigureQA/images/22286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest segment in the pie chart?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 33.0625, "loss": 3.0646262168884277 }, { "id": "identity_126003", "image": "PMC-VQA/images/PMC6879599_F5_74476.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible signs of surgical intervention in the brain image?" }, { "from": "gpt", "value": "The answer is No visible signs of surgical intervention" } ], "clip_score": 28.578125, "loss": 2.390817880630493 }, { "id": "identity_223089", "image": "VQA2.0/images/540097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many condiment bottles are on the table closest to the camera?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.21875, "loss": 2.2723262310028076 }, { "id": "identity_309001", "image": "UniGeo/images/364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,CD是⊙O的弦,∠ACD=40°,则∠BAD是多少?\n选项:\n(A) 40°\n(B) 50°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 2.460266590118408 }, { "id": "identity_247140", "image": "TabMWP/images/32855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Layla need to buy a motorcycle, a dirt bike, and a snowmobile?" }, { "from": "gpt", "value": "The answer is 1,941" } ], "clip_score": 33.5, "loss": 2.5830206871032715 }, { "id": "identity_242332", "image": "TabMWP/images/31715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Edward need to buy two pieces of elegant stationery, one piece of floral stationery, and one purple envelope?" }, { "from": "gpt", "value": "The answer is 0.20" } ], "clip_score": 30.40625, "loss": 2.2141482830047607 }, { "id": "identity_312103", "image": "UniGeo/images/193.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,∠1=62°,若m∥n,则∠2的度数是多少?\n选项:\n(A) 118°\n(B) 28°\n(C) 62°\n(D) 38°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0, "loss": 3.2271041870117188 }, { "id": "identity_70402", "image": "ScienceQA/images/3191.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 26.453125, "loss": 2.3534812927246094 }, { "id": "identity_252598", "image": "CLEVR-Math/images/CLEVR_train_028445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 more small metal cubes and 1 more big metal cube, how many objects will exist in total?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 31.265625, "loss": 1.9505704641342163 }, { "id": "identity_219926", "image": "A-OKVQA/images/148395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the green car in a showroom or outdoor setting?" }, { "from": "gpt", "value": "The answer is Showroom" } ], "clip_score": 24.921875, "loss": 2.5244057178497314 }, { "id": "identity_62093", "image": "DocVQA/images/zjkm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the paperback version mentioned in the memorandum?" }, { "from": "gpt", "value": "The answer is $15.95" } ], "clip_score": 19.125, "loss": 2.144423723220825 }, { "id": "identity_234649", "image": "TQA/images/question_images/seasons_172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many seasons are there in both the Southern and Northern Hemispheres combined, as depicted in the diagram?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.1875, "loss": 2.842486619949341 }, { "id": "identity_53565", "image": "MapQA/images/map_7710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of farms in the state that borders the northern side of California?" }, { "from": "gpt", "value": "The answer is 10.1%-10.7%" } ], "clip_score": 36.625, "loss": 1.4113937616348267 }, { "id": "identity_35062", "image": "DocVQA/images/kzkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date at bottom of the page?" }, { "from": "gpt", "value": "The answer is 15 September 1963" } ], "clip_score": 13.078125, "loss": 1.9910132884979248 }, { "id": "identity_39050", "image": "DVQA/images/bar_train_00168561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the lowest total number of units sold across all stores?" }, { "from": "gpt", "value": "The answer is staff" } ], "clip_score": 28.53125, "loss": 4.110965728759766 }, { "id": "identity_329772", "image": "TextVQA/images/b68cfbe6cbc14443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this going to take place?" }, { "from": "gpt", "value": "The answer is wales" } ], "clip_score": 9.09375, "loss": 3.812227487564087 }, { "id": "identity_6132", "image": "TextVQA/images/baa540d780fc1d68.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the sign?" }, { "from": "gpt", "value": "The answer is wells fargo" } ], "clip_score": 32.6875, "loss": 1.8899611234664917 }, { "id": "identity_181264", "image": "TabMWP/images/22166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more money was donated to Medical research than to Parks in June?" }, { "from": "gpt", "value": "The answer is $11" } ], "clip_score": 33.40625, "loss": 2.1573054790496826 }, { "id": "identity_113700", "image": "PMC-VQA/images/PMC9221261_biomolecules-12-00820-f007_318906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time stamp shown in the top right corner of the image?" }, { "from": "gpt", "value": "The answer is 00:00:00 hrs" } ], "clip_score": 18.203125, "loss": 1.3534680604934692 }, { "id": "identity_337370", "image": "TextVQA/images/75abd89790812733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which fruit company is on this box?" }, { "from": "gpt", "value": "The answer is dole" } ], "clip_score": 13.5390625, "loss": 3.1530253887176514 }, { "id": "identity_81449", "image": "Geometry3K/images/935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the triangle?" }, { "from": "gpt", "value": "The answer is √89 or approximately 9.43" } ], "clip_score": 34.9375, "loss": 2.323516368865967 }, { "id": "identity_69338", "image": "TQA/images/abc_question_images/human_system_digestive_16089.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label X refer to in the image?" }, { "from": "gpt", "value": "The answer is Large intestine" } ], "clip_score": 27.453125, "loss": 2.1436119079589844 }, { "id": "identity_226920", "image": "ScienceQA/images/4551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three characteristics of the material this box is made of, and how do these characteristics make it suitable for packaging?" }, { "from": "gpt", "value": "The answer is Lightweight, durability, recyclability" } ], "clip_score": 27.5625, "loss": 3.0340561866760254 }, { "id": "identity_112811", "image": "PMC-VQA/images/PMC7770766_Fig2_331531.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any spherical particles visible in the image?" }, { "from": "gpt", "value": "The answer is Yes or No, depending on the presence of such shapes" } ], "clip_score": 19.890625, "loss": 2.837700366973877 }, { "id": "identity_319292", "image": "GeoQA+/images/2195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图, 圆O的直径AB为4, 点C在圆O上, ∠ACB的平分线交圆O于点D, 连接AD、BD, 则AD的长等于多少?\n\n选项:\n(A) 2\n(B) 3\n(C) 2√{2}\n(D) 2√{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.375, "loss": 2.2505691051483154 }, { "id": "identity_168847", "image": "TabMWP/images/26652.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which month did Michael spend the least amount of time babysitting?" }, { "from": "gpt", "value": "The answer is March and May" } ], "clip_score": 30.59375, "loss": 3.4740586280822754 }, { "id": "identity_253924", "image": "UniGeo/images/1835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A在点O的北偏东60°的方向上,点B在点O的南偏东40°的方向上,若点C在点O的北偏西30°的方向上,则∠AOC与∠BOC的度数和为多少?" }, { "from": "gpt", "value": "The answer is 200°" } ], "clip_score": 33.75, "loss": 2.074993371963501 }, { "id": "identity_266099", "image": "GeoQA+/images/316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point D lies on the extension of diameter AB, and line DC is tangent to circle O at point G. If angle A is 35°, what is the measure of angle D?\nChoices:\n(A) 20°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.5625, "loss": 2.288306951522827 }, { "id": "identity_329920", "image": "Super-CLEVR/images/superCLEVR_new_000778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are to the left of this purple object?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 28.796875, "loss": 3.0007903575897217 }, { "id": "identity_261101", "image": "DocVQA/images/hllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main sections of the document, and what is the purpose of each section?" }, { "from": "gpt", "value": "The answer is METHOD OF ADVERTISING\" and \"ABSENCE OF ADVERTISING\"; methods and justifications" } ], "clip_score": 20.109375, "loss": 2.3135998249053955 }, { "id": "identity_304658", "image": "FigureQA/images/32872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.59375, "loss": 3.9464824199676514 }, { "id": "identity_53841", "image": "FigureQA/images/56770.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the Orchid bar?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 21.84375, "loss": 2.6217634677886963 }, { "id": "identity_136000", "image": "DVQA/images/bar_train_00012229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per item?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 21.625, "loss": 2.762436628341675 }, { "id": "identity_93812", "image": "IconQA/images/60208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents the side of a square, what would be the perimeter of that square?" }, { "from": "gpt", "value": "The answer is 21.6 cm" } ], "clip_score": 28.015625, "loss": 2.3354735374450684 }, { "id": "identity_87335", "image": "UniGeo/images/3326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AO is parallel to DC and ∠AOD = 70°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 35°" } ], "clip_score": 32.09375, "loss": 1.9189226627349854 }, { "id": "identity_82352", "image": "UniGeo/images/2922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠A = 40°, what is the length of segment OB, assuming the radius of the circle is r?" }, { "from": "gpt", "value": "The answer is r" } ], "clip_score": 34.53125, "loss": 3.453329563140869 }, { "id": "identity_179753", "image": "TabMWP/images/28701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did Mitch drive more than 90 miles?" }, { "from": "gpt", "value": "The answer is 5 days" } ], "clip_score": 32.75, "loss": 2.191135883331299 }, { "id": "identity_20368", "image": "PMC-VQA/images/PMC7603721_Fig2_288945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of medical imaging was performed at admission? \nChoices:\n(A) CT scan \n(B) MRI \n(C) X-ray \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.890625, "loss": 4.148720741271973 }, { "id": "identity_32223", "image": "PMC-VQA/images/PMC7652921_Fig2_294443.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the topic of the image? \nChoices:\n(A) Dogs \n(B) Cats \n(C) Hemorrhage \n(D) Trees " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.390625, "loss": 4.644782066345215 }, { "id": "identity_94356", "image": "IconQA/images/52701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If three birds fly away, how many birds will remain?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 27.4375, "loss": 1.9999319314956665 }, { "id": "identity_144824", "image": "DVQA/images/bar_train_00125842.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of people who prefer 'extent' across all preferences?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.84375, "loss": 2.44189715385437 }, { "id": "identity_35832", "image": "PMC-VQA/images/PMC7324723_F3_201814.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI imaging was performed? \nChoices:\n(A) T1 weighted imaging \n(B) Diffusion weighted imaging \n(C) Axial FLAIR imaging \n(D) Sagittal T2 weighted imaging " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.609375, "loss": 4.0250654220581055 }, { "id": "identity_179597", "image": "TabMWP/images/14492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Tanner wants to buy 3 birthday balloons and 2 kazoos, how much will he spend?" }, { "from": "gpt", "value": "The answer is $7.05" } ], "clip_score": 26.8125, "loss": 2.1242198944091797 }, { "id": "identity_115703", "image": "PMC-VQA/images/PMC4711110_Fig1_461730.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the hip joints?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 28.84375, "loss": 3.018984317779541 }, { "id": "identity_239104", "image": "TQA/images/abc_question_images/parts_microscope_17198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter indicates the part of the microscope that adjusts the focus and is located near the lenses that adjust viewing?" }, { "from": "gpt", "value": "The answer is W" } ], "clip_score": 34.40625, "loss": 3.4674696922302246 }, { "id": "identity_275563", "image": "VQA-RAD/images/synpic25587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show a deviation in the trachea?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.734375, "loss": 3.1062428951263428 }, { "id": "identity_144373", "image": "DocVQA/images/xrcy0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount billed for secretarial assistance?" }, { "from": "gpt", "value": "The answer is $140" } ], "clip_score": 23.921875, "loss": 2.4971253871917725 }, { "id": "identity_81733", "image": "Geometry3K/images/729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If DE = 60 and FC = 16, find the length of segment AC" }, { "from": "gpt", "value": "The answer is 62" } ], "clip_score": 30.9375, "loss": 2.9147660732269287 }, { "id": "identity_149820", "image": "DocVQA/images/rrcw0217_20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area code of the telephone number mentioned in the document?" }, { "from": "gpt", "value": "The answer is 609" } ], "clip_score": 25.03125, "loss": 2.119091272354126 }, { "id": "identity_156443", "image": "ScienceQA/images/8588.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of chemical bonds are connecting the sulfur atoms in the compound?" }, { "from": "gpt", "value": "The answer is Covalent bonds" } ], "clip_score": 33.75, "loss": 1.7872157096862793 }, { "id": "identity_44401", "image": "FigureQA/images/84788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Dark Green at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 29.09375, "loss": 2.5577473640441895 }, { "id": "identity_254824", "image": "VQA-AS/images/18891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the safety rules depicted in the playground, and how do they ensure the safety of children?" }, { "from": "gpt", "value": "The answer is Rules prevent accidents and ensure safe play for children" } ], "clip_score": 25.171875, "loss": 3.4832239151000977 }, { "id": "identity_117526", "image": "VizWiz/images/VizWiz_train_00016351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any non-plant elements visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, there are rocks and possibly a human finger" } ], "clip_score": 20.71875, "loss": 3.0138630867004395 }, { "id": "identity_88912", "image": "Geometry3K/images/1118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle opposite to the 4 m side in the left triangle?" }, { "from": "gpt", "value": "The answer is 44°" } ], "clip_score": 35.25, "loss": 2.274226665496826 }, { "id": "identity_2669", "image": "DVQA/images/bar_train_00037100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second bar from the left in each group?" }, { "from": "gpt", "value": "The answer is code" } ], "clip_score": 17.953125, "loss": 4.171520709991455 }, { "id": "identity_160013", "image": "GeoQA+/images/5484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠BCD = 100°, what is the measure of ∠BOC?" }, { "from": "gpt", "value": "The answer is 200°" } ], "clip_score": 32.15625, "loss": 1.4779921770095825 }, { "id": "identity_334769", "image": "GeoQA+/images/1731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AD∥BE∥CF,直线a、b与这三条平行线分别交于点A、B、C和点D、E、F,AB=4,BC=6,DE=3,则EF的长度是()\n选项:\n(A) 4\n(B) 5\n(C) 6\n(D) 4.5" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.1875, "loss": 2.593681812286377 }, { "id": "identity_175005", "image": "TabMWP/images/1927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings have less than 40 bricks?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.71875, "loss": 2.0957651138305664 }, { "id": "identity_82129", "image": "UniGeo/images/3032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 34.1875, "loss": 1.8536179065704346 }, { "id": "identity_100874", "image": "TabMWP/images/1959.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total time taken by the tour bus to complete one full round starting from the university campus?" }, { "from": "gpt", "value": "The answer is 7 hours and 45 minutes" } ], "clip_score": 33.6875, "loss": 1.4446030855178833 }, { "id": "identity_98921", "image": "IconQA/images/23134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices does this shape have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.671875, "loss": 2.111215829849243 }, { "id": "identity_198643", "image": "TextVQA/images/0ab462028eb0810e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the book located second from the left on the top shelf?" }, { "from": "gpt", "value": "The answer is The Hobbit" } ], "clip_score": 23.0, "loss": 2.941741466522217 }, { "id": "identity_112768", "image": "Super-CLEVR/images/superCLEVR_new_000803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the airplane in the center of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.0625, "loss": 3.5110816955566406 }, { "id": "identity_80892", "image": "UniGeo/images/1038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠C = 40°, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 33.25, "loss": 1.7510976791381836 }, { "id": "identity_60473", "image": "FigureQA/images/84644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of Light Sky Blue dots greater than the number of Lawn Green dots?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.71875, "loss": 3.4458770751953125 }, { "id": "identity_48536", "image": "DocVQA/images/qpvg0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the recipient?" }, { "from": "gpt", "value": "The answer is Dr. Damaj" } ], "clip_score": 18.875, "loss": 4.40593147277832 }, { "id": "identity_267592", "image": "FigureQA/images/97219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes the smoothness of Burlywood? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.765625, "loss": 4.21542501449585 }, { "id": "identity_143548", "image": "DVQA/images/bar_train_00011672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item sold the most units?" }, { "from": "gpt", "value": "The answer is column" } ], "clip_score": 24.640625, "loss": 3.1549057960510254 }, { "id": "identity_272507", "image": "ScienceQA/images/19648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Utah?\nChoices:\n(A) Nampa\n(B) Denver\n(C) Provo\n(D) Salt Lake City" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.796875, "loss": 3.72312593460083 }, { "id": "identity_205073", "image": "TextVQA/images/3f75d69c241df1f4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of glassware is the beer on the left served in?" }, { "from": "gpt", "value": "The answer is Beer goblet or chalice" } ], "clip_score": 25.15625, "loss": 2.462435722351074 }, { "id": "identity_21489", "image": "UniGeo/images/2563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD是菱形,∠ABC=120°,AD=2,则对角线AC的长是()\nChoices:\n(A) 4\n(B) √{3}\n(C) 2√{5}\n(D) 2√{3}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.328125, "loss": 2.2291934490203857 }, { "id": "identity_289379", "image": "A-OKVQA/images/61498.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose are the tennis balls in the cart intended? \n(A) selling\n(B) training\n(C) playing\n(D) gifting" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.78125, "loss": 4.586374282836914 }, { "id": "identity_248580", "image": "A-OKVQA/images/499915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key visual elements in the image that indicate the event is a protest, and how do they collectively support this conclusion?" }, { "from": "gpt", "value": "The answer is Protest" } ], "clip_score": 23.1875, "loss": 5.415761470794678 }, { "id": "identity_175597", "image": "TabMWP/images/3024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the values in the y column are doubled, what will be the new value of y when x is 9?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 25.1875, "loss": 2.698512315750122 }, { "id": "identity_218574", "image": "TextVQA/images/86401c9659fc9f6b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the team's uniform on the near side of the net?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 21.6875, "loss": 2.974400043487549 }, { "id": "identity_169798", "image": "IconQA/images/15987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue buckets are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 30.078125, "loss": 2.405158758163452 }, { "id": "identity_227313", "image": "ScienceQA/images/289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of Pennsylvania and then name the capital of the state directly to the east of Pennsylvania" }, { "from": "gpt", "value": "The answer is Trenton" } ], "clip_score": 28.65625, "loss": 3.2253201007843018 }, { "id": "identity_59251", "image": "DVQA/images/bar_train_00038964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the accuracy of the algorithm peace on the dataset face higher than on the dataset attack?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.625, "loss": 3.2079060077667236 }, { "id": "identity_7630", "image": "AI2D/images/3759.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Midriff also called as?\nChoices:\n(A) Pharynx\n(B) Diaphragm\n(C) Bronchi\n(D) Larynx" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.390625, "loss": 4.128990173339844 }, { "id": "identity_65721", "image": "AI2D/images/2880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the ear is responsible for converting sound waves into electrical signals?" }, { "from": "gpt", "value": "The answer is Inner Ear" } ], "clip_score": 31.625, "loss": 2.529400110244751 }, { "id": "identity_214092", "image": "VQA-AS/images/9862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What weather phenomenon is depicted in the sky?" }, { "from": "gpt", "value": "The answer is a cloud" } ], "clip_score": 20.59375, "loss": 4.192523956298828 }, { "id": "identity_305864", "image": "IconQA/images/4508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there an even or odd number of rectangles?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.6875, "loss": 3.685286045074463 }, { "id": "identity_290202", "image": "IconQA/images/36146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture is the fourth in the sequence, given that the first picture is a sub? Options are fish, sub, and turtle.\nChoices:\n(A) fish\n(B) sub\n(C) turtle" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.75, "loss": 4.407756805419922 }, { "id": "identity_337539", "image": "UniGeo/images/2674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 在一个平行四边形中,若一个角为30°,一条边上的高为8,则另一条边的长为()\nChoices:\n(A) 16\n(B) 8\n(C) 8√{2}\n(D) 8√{3}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.96875, "loss": 3.0551719665527344 }, { "id": "identity_173472", "image": "IconQA/images/40501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are all the shapes?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 35.21875, "loss": 2.2315897941589355 }, { "id": "identity_127552", "image": "PMC-VQA/images/PMC4937712_F1_49549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the enhancement seen at the location of the yellow arrow?" }, { "from": "gpt", "value": "The answer is It could indicate a pituitary disorder" } ], "clip_score": 32.0625, "loss": 3.249577283859253 }, { "id": "identity_279067", "image": "TabMWP/images/9802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in revenue between The Big Cheese's sales of cheese pizza and vegetarian pizza, as compiled by the food industry researcher?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 38.53125, "loss": 3.7705023288726807 }, { "id": "identity_107762", "image": "VQA-RAD/images/synpic19114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the diaphragm in this chest X-ray?" }, { "from": "gpt", "value": "The answer is Diaphragm is intact, no free air" } ], "clip_score": 34.625, "loss": 2.890369176864624 }, { "id": "identity_12577", "image": "AI2D/images/3603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is directly below the pharynx?\nChoices:\n(A) Diaphragm\n(B) Lung\n(C) Trachea\n(D) Larynx" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.625, "loss": 3.930314302444458 }, { "id": "identity_54300", "image": "FigureQA/images/86700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the line with the highest starting value on the y-axis?" }, { "from": "gpt", "value": "The answer is Light Seafoam" } ], "clip_score": 28.9375, "loss": 3.3906311988830566 }, { "id": "identity_165588", "image": "GeoQA+/images/1653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if the measure of angle APB is 40 degrees and the line segment CD passes through the center O of the circle, what is the measure of angle CDE?" }, { "from": "gpt", "value": "The answer is 20 degrees" } ], "clip_score": 32.90625, "loss": 1.755455493927002 }, { "id": "identity_172126", "image": "IconQA/images/26328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of purple sections to non-purple sections?" }, { "from": "gpt", "value": "The answer is 3:5" } ], "clip_score": 36.8125, "loss": 1.5142234563827515 }, { "id": "identity_315648", "image": "TQA/images/question_images/human_system_digestive_3937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is located between the esophagus and the duodenum?\nChoices:\n(A) Cecum\n(B) Stomach\n(C) Ascending colon\n(D) Spleen" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.9375, "loss": 4.155738353729248 }, { "id": "identity_180864", "image": "TabMWP/images/35176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost of the five key chains?" }, { "from": "gpt", "value": "The answer is $0.73" } ], "clip_score": 33.4375, "loss": 1.7847459316253662 }, { "id": "identity_289365", "image": "VQA-AS/images/18950.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which food is currently being cooked on the grill?" }, { "from": "gpt", "value": "The answer is cheese" } ], "clip_score": 24.765625, "loss": 3.1106319427490234 }, { "id": "identity_103119", "image": "CLEVR-Math/images/CLEVR_train_060492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.84375, "loss": 2.4739797115325928 }, { "id": "identity_237926", "image": "GeoQA+/images/3997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若AB是⊙O的直径,CD是⊙O的弦,∠ABD=50°,求∠ACD的度数。" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 30.25, "loss": 1.9235128164291382 }, { "id": "identity_287289", "image": "DocVQA/images/yxph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which individuals have signed the report?" }, { "from": "gpt", "value": "The answer is Chief Chemist" } ], "clip_score": 24.390625, "loss": 4.152843952178955 }, { "id": "identity_219136", "image": "VQA2.0/images/360238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pedestrians visible on the sidewalk in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.5, "loss": 3.1683313846588135 }, { "id": "identity_235", "image": "TabMWP/images/3931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Kylie has $92.00. Does she have enough to buy a hammock and a lawn chair?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.8125, "loss": 3.839675188064575 }, { "id": "identity_277712", "image": "UniGeo/images/1171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given figure, triangle ABC is inscribed in circle O, AB=BC, ∠ABC=120°, AD is the diameter of circle O, AD=10. What is the length of BD?\nChoices:\n(A) 8\n(B) 5\n(C) 8√{3}\n(D) 5√{3}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.90625, "loss": 2.5000500679016113 }, { "id": "identity_160076", "image": "Geometry3K/images/1689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the triangle?" }, { "from": "gpt", "value": "The answer is 37.5√3" } ], "clip_score": 32.71875, "loss": 2.2695913314819336 }, { "id": "identity_18100", "image": "PlotQA/images/89054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and the second highest percentage of unemployed males population in in Luxembourg ?" }, { "from": "gpt", "value": "The answer is 1.6599998474121103" } ], "clip_score": 21.046875, "loss": 1.858697533607483 }, { "id": "identity_286717", "image": "IconQA/images/10736.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using paper clips. The line is approximately (_) paper clips long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.0625, "loss": 2.393965721130371 }, { "id": "identity_102910", "image": "CLEVR-Math/images/CLEVR_train_043427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.921875, "loss": 2.4656410217285156 }, { "id": "identity_251836", "image": "DocVQA/images/knmc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full date and location of the event mentioned in the document?" }, { "from": "gpt", "value": "The answer is December 4, 1967, Freeborn Hall, University of California, Davis" } ], "clip_score": 36.15625, "loss": 0.9856578707695007 }, { "id": "identity_333844", "image": "GeoQA+/images/2433.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在一个5×4的网格中,每个小正方形的边长都是1,△ABC的顶点都在这些小正方形的顶点上,则tan∠BAC的值为()\nChoices:\n(A) \\frac{4}{3}\n(B) 0.75\n(C) 0.6\n(D) 0.8" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.65625, "loss": 2.404050827026367 }, { "id": "identity_6590", "image": "GeoQA+/images/3518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,已知∠ABO=50°,则∠ACB的大小为()\nChoices:\n(A) 40°\n(B) 30°\n(C) 45°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.734375, "loss": 2.5739598274230957 }, { "id": "identity_135910", "image": "DVQA/images/bar_train_00198679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between the objects 'couple' and 'half'?" }, { "from": "gpt", "value": "The answer is Approximately 20%" } ], "clip_score": 27.921875, "loss": 2.451587677001953 }, { "id": "identity_286056", "image": "VizWiz/images/VizWiz_train_00008597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which flavor of Marie Callender's dinner is this?" }, { "from": "gpt", "value": "The answer is beef n vegetables" } ], "clip_score": 30.59375, "loss": 3.3515124320983887 }, { "id": "identity_143199", "image": "PlotQA/images/54720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are represented in the graph?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.34375, "loss": 2.638624668121338 }, { "id": "identity_209317", "image": "VizWiz/images/VizWiz_train_00004552.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sticker on the fruit in the center?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.765625, "loss": 3.7117371559143066 }, { "id": "identity_213574", "image": "TextVQA/images/290989ff27a7ee75.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fruit is displayed in the section directly in front of the checkout counters?" }, { "from": "gpt", "value": "The answer is Bananas" } ], "clip_score": 29.140625, "loss": 2.808837890625 }, { "id": "identity_138080", "image": "DVQA/images/bar_train_00076207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined accuracy value for 'sort' and 'burden' datasets on 'clouds'?" }, { "from": "gpt", "value": "The answer is 12.5" } ], "clip_score": 32.5625, "loss": 2.0298690795898438 }, { "id": "identity_8310", "image": "TabMWP/images/33316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To figure out how many vacation days she had left to use, Kira looked over her old calendars to figure out how many days of vacation she had taken each year. According to the table, what was the rate of change between 2014 and 2015?" }, { "from": "gpt", "value": "The answer is -4" } ], "clip_score": 40.875, "loss": 3.5156376361846924 }, { "id": "identity_129841", "image": "VQA2.0/images/398442.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number visible on the cart in the background?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 17.109375, "loss": 2.550916910171509 }, { "id": "identity_21188", "image": "VQA2.0/images/329799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tent in the image?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 17.40625, "loss": 4.05338191986084 }, { "id": "identity_264398", "image": "VQA2.0/images/323249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus's front?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 23.3125, "loss": 3.953381061553955 }, { "id": "identity_68140", "image": "TQA/images/abc_question_images/parts_microscope_17197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the objective lenses?" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 22.125, "loss": 3.540034770965576 }, { "id": "identity_125013", "image": "VQA2.0/images/338549.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles can be seen in the background?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 16.078125, "loss": 4.301419734954834 }, { "id": "identity_229547", "image": "VQA2.0/images/374135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full registration number on the plane's wing?" }, { "from": "gpt", "value": "The answer is NX90850" } ], "clip_score": 20.40625, "loss": 2.4087445735931396 }, { "id": "identity_5372", "image": "ScienceQA/images/13244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can Nick and Gwen trade to each get what they want?\nChoices:\n(A) Nick can trade his tomatoes for Gwen's broccoli.\n(B) Nick can trade his tomatoes for Gwen's carrots.\n(C) Gwen can trade her broccoli for Nick's oranges.\n(D) Gwen can trade her almonds for Nick's tomatoes." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.390625, "loss": 3.0585784912109375 }, { "id": "identity_319347", "image": "FigureQA/images/39837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.5078125, "loss": 3.8555800914764404 }, { "id": "identity_253604", "image": "TextVQA/images/1d46818a0c8a0158.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total price of all bottles with labels that have a blue color?" }, { "from": "gpt", "value": "The answer is 11.99" } ], "clip_score": 26.140625, "loss": 1.6237802505493164 }, { "id": "identity_4121", "image": "TabMWP/images/1692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Shannon conducted a blind taste test on some of her friends in order to determine if organic fruits tasted different than non-organic fruits. Each friend ate one type of fruit. What is the probability that a randomly selected friend tasted apples and preferred non-organic? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 4/15" } ], "clip_score": 29.078125, "loss": 1.898468017578125 }, { "id": "identity_276994", "image": "A-OKVQA/images/306431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of signs are being displayed? \nOptions:\n(A) brand\n(B) directional\n(C) regulatory\n(D) warning" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.612936973571777 }, { "id": "identity_226284", "image": "VQA2.0/images/115275.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many floor tiles are visible in the image, and what is the average number of tiles per row if the visible tiles are arranged in a rectangular grid?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 11.9921875, "loss": 3.703272819519043 }, { "id": "identity_121605", "image": "PMC-VQA/images/PMC4074105_pone-0100498-g001_301927.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the data points in graph A connected by a line?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.5625, "loss": 3.316011905670166 }, { "id": "identity_199346", "image": "PMC-VQA/images/PMC6728781_FIG1_34433.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of scan is shown in the image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 30.5, "loss": 3.153305768966675 }, { "id": "identity_13794", "image": "ScienceQA/images/6556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of New Jersey?\nChoices:\n(A) Newark\n(B) Hartford\n(C) Jersey City\n(D) Trenton" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.328125, "loss": 3.754314422607422 }, { "id": "identity_103717", "image": "VizWiz/images/VizWiz_train_00005817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the dish soap claimed to be environmentally friendly?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.65625, "loss": 3.1138267517089844 }, { "id": "identity_120662", "image": "Super-CLEVR/images/superCLEVR_new_001288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle in the image that is larger than the red regular bus?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.796875, "loss": 3.3614883422851562 }, { "id": "identity_140130", "image": "DVQA/images/bar_train_00040347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the values of 'guilt' and 'reply' in the 'smell' group?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 22.5, "loss": 2.7413854598999023 }, { "id": "identity_45768", "image": "FigureQA/images/40039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color represents the highest value on the graph?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 29.796875, "loss": 2.9068336486816406 }, { "id": "identity_149717", "image": "DocVQA/images/tllg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many main sections does the document have?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.21875, "loss": 4.460701942443848 }, { "id": "identity_68696", "image": "AI2D/images/1098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the leaf is responsible for attaching it to the stem?" }, { "from": "gpt", "value": "The answer is Petiole" } ], "clip_score": 32.46875, "loss": 1.9301177263259888 }, { "id": "identity_51541", "image": "ChartQA/images/multi_col_60022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of participants (boys and girls) in 2016/17?" }, { "from": "gpt", "value": "The answer is 147,823.5" } ], "clip_score": 25.375, "loss": 2.4429259300231934 }, { "id": "identity_314779", "image": "TQA/images/question_images/waves_8227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the distance from D to H measure?\nChoices:\n(A) Wavelength\n(B) Crest\n(C) Amplitude\n(D) Trough" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.96875, "loss": 4.249276638031006 }, { "id": "identity_226503", "image": "GeoQA+/images/2012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O直径,CD是⊙O的弦,如果∠BAD=56°,求∠BCD的大小。" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 32.46875, "loss": 2.0472023487091064 }, { "id": "identity_256074", "image": "PMC-VQA/images/PMC7082429_fig2_138370.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the CT scan report, what are the key features that indicate a Grade IV trauma, and how do these features differentiate it from Grade I, II, and III traumas?" }, { "from": "gpt", "value": "The answer is Grade IV trauma involves extensive laceration and major vascular involvement" } ], "clip_score": 26.5625, "loss": 3.0368564128875732 }, { "id": "identity_201666", "image": "Super-CLEVR/images/superCLEVR_new_001449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that has a similar shape to the cyan car but is not a car?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.640625, "loss": 3.0984530448913574 }, { "id": "identity_63474", "image": "AI2D/images/4276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of gas is extracted from the layer labeled as \"Tight sand/gas\"?" }, { "from": "gpt", "value": "The answer is Tight sand gas" } ], "clip_score": 33.5625, "loss": 2.5923428535461426 }, { "id": "identity_31889", "image": "IconQA/images/39508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the rectangle?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.40625, "loss": 2.3819353580474854 }, { "id": "identity_18789", "image": "UniGeo/images/3444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C三点在圆O上,AD平分∠BAC,∠DAC=35°,则∠BOC=()\nChoices:\n(A) 35°\n(B) 70°\n(C) 120°\n(D) 140°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.65625, "loss": 2.640941619873047 }, { "id": "identity_223975", "image": "VQA-AS/images/17987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many red objects are in the image, and what are they?" }, { "from": "gpt", "value": "The answer is four red objects: flames, shirt, two cups" } ], "clip_score": 26.875, "loss": 2.6664257049560547 }, { "id": "identity_300485", "image": "DocVQA/images/mtnn0081_34.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the % of no deal in a switch purchase for this ice cream?" }, { "from": "gpt", "value": "The answer is 20.0%" } ], "clip_score": 29.859375, "loss": 1.5516589879989624 }, { "id": "identity_59809", "image": "DocVQA/images/frdv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the General Foods Corporation where the meeting is scheduled?" }, { "from": "gpt", "value": "The answer is 555 South Broadway Tarrytown, New York" } ], "clip_score": 24.953125, "loss": 1.8649545907974243 }, { "id": "identity_303606", "image": "DVQA/images/bar_train_00161010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of this bar?" }, { "from": "gpt", "value": "The answer is stock" } ], "clip_score": 23.859375, "loss": 3.8241074085235596 }, { "id": "identity_255055", "image": "TabMWP/images/28700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each pond has 10 frogs, how many frogs are in 7 ponds?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 34.125, "loss": 1.6347625255584717 }, { "id": "identity_200452", "image": "TextVQA/images/0fd38fd0f9bf1bee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of fuel does the green locomotive use?" }, { "from": "gpt", "value": "The answer is Steam" } ], "clip_score": 24.546875, "loss": 2.546947479248047 }, { "id": "identity_163498", "image": "GeoQA+/images/3522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle O, if the measure of angle ABC is 51 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 51 degrees" } ], "clip_score": 34.09375, "loss": 1.655416488647461 }, { "id": "identity_114262", "image": "TextVQA/images/438617e13660aa6e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this whiskey distilled at a single distillery?" }, { "from": "gpt", "value": "The answer is Yes, it is a single distillery" } ], "clip_score": 24.859375, "loss": 2.287334680557251 }, { "id": "identity_41031", "image": "ChartQA/images/two_col_4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the highest and lowest annual gross domestic product growth from 2000 to 2018?" }, { "from": "gpt", "value": "The answer is 9.8%" } ], "clip_score": 25.0625, "loss": 1.839832067489624 }, { "id": "identity_129922", "image": "TextVQA/images/d5e309d94f12c76e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style is the clock tower built in?" }, { "from": "gpt", "value": "The answer is Gothic" } ], "clip_score": 30.109375, "loss": 2.5540828704833984 }, { "id": "identity_192477", "image": "PMC-VQA/images/PMC6476123_fig2_469077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the vertebrae?" }, { "from": "gpt", "value": "The answer is Yes, abnormalities present" } ], "clip_score": 28.921875, "loss": 3.500624179840088 }, { "id": "identity_217783", "image": "VQA2.0/images/491739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headgear is the person second from the left wearing?" }, { "from": "gpt", "value": "The answer is Cowboy hat" } ], "clip_score": 25.109375, "loss": 3.2123007774353027 }, { "id": "identity_86581", "image": "UniGeo/images/835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle O, if angle A is 35 degrees and angle BPC is 78 degrees, what is the measure of angle BPD?" }, { "from": "gpt", "value": "The answer is 113 degrees" } ], "clip_score": 32.9375, "loss": 1.6964621543884277 }, { "id": "identity_60350", "image": "DocVQA/images/yynk0079_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the insured company?" }, { "from": "gpt", "value": "The answer is R.J. Reynolds Tobacco Co" } ], "clip_score": 23.40625, "loss": 2.8948285579681396 }, { "id": "identity_120219", "image": "Super-CLEVR/images/superCLEVR_new_001263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that have the same color wheels?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.34375, "loss": 3.3436920642852783 }, { "id": "identity_97465", "image": "IconQA/images/14181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides does each shape have?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.96875, "loss": 2.472773313522339 }, { "id": "identity_135531", "image": "DocVQA/images/lqgm0052_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of the break for poster presentations?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 21.15625, "loss": 2.2250075340270996 }, { "id": "identity_228916", "image": "DVQA/images/bar_train_00193755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of all the values in the chart?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 22.828125, "loss": 2.2889022827148438 }, { "id": "identity_288866", "image": "ScienceQA/images/17075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area?\nChoices:\n(A) Kiribati\n(B) the Marshall Islands\n(C) Nauru\n(D) Tuvalu" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.15625, "loss": 3.9213809967041016 }, { "id": "identity_231950", "image": "FigureQA/images/86041.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the second largest area under the curve, and what is the difference in area between the largest and the second largest areas?" }, { "from": "gpt", "value": "The answer is Medium Orchid, 500" } ], "clip_score": 23.03125, "loss": 2.7491090297698975 }, { "id": "identity_61655", "image": "DocVQA/images/ykdb0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any tissue labs with a negative response in all three organs?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.953125, "loss": 2.6979269981384277 }, { "id": "identity_288369", "image": "VizWiz/images/VizWiz_train_00002691.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could you please identify this box?" }, { "from": "gpt", "value": "The answer is saltines" } ], "clip_score": 23.265625, "loss": 3.721937894821167 }, { "id": "identity_316424", "image": "AI2D/images/3283.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the label here?\nChoices:\n(A) lysosome\n(B) golgi apparatus\n(C) ribosomes\n(D) peroxisome" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.859375, "loss": 4.257126808166504 }, { "id": "identity_205708", "image": "A-OKVQA/images/578945.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is taking place in the background?" }, { "from": "gpt", "value": "The answer is Street fair or outdoor market" } ], "clip_score": 20.75, "loss": 2.688292980194092 }, { "id": "identity_112388", "image": "TextVQA/images/5477f897677ff2b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the photographer who took the cover photo?" }, { "from": "gpt", "value": "The answer is Yasuhiro Ushimura" } ], "clip_score": 27.671875, "loss": 2.7344717979431152 }, { "id": "identity_144923", "image": "DVQA/images/bar_train_00192428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more units did the 'shame' store sell than the 'winter' store for the item 'object'?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.3125, "loss": 2.4309194087982178 }, { "id": "identity_165053", "image": "UniGeo/images/920.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB and CD are chords of circle O and AB is parallel to CD, what is the measure of angle BOC?" }, { "from": "gpt", "value": "The answer is 72°" } ], "clip_score": 37.25, "loss": 2.4266204833984375 }, { "id": "identity_65815", "image": "ScienceQA/images/8711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is each object made of?" }, { "from": "gpt", "value": "The answer is Ceramic, Ice, Glass" } ], "clip_score": 22.90625, "loss": 1.5313653945922852 }, { "id": "identity_280108", "image": "PMC-VQA/images/PMC7723371_f2_316772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the result of the MRI scan in the area of the left artery? \nChoices:\n(A) Hypersignal\n(B) Iso signal\n(C) Hyposignal\n(D) None of the above." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.921875, "loss": 4.113226890563965 }, { "id": "identity_112725", "image": "A-OKVQA/images/97427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many burners are visible on the stovetop?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.703125, "loss": 4.011898994445801 }, { "id": "identity_141027", "image": "DVQA/images/bar_train_00135510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per item in 'good' stores?" }, { "from": "gpt", "value": "The answer is 6.5" } ], "clip_score": 30.65625, "loss": 2.1531219482421875 }, { "id": "identity_317259", "image": "Super-CLEVR/images/superCLEVR_new_001270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large yellow shiny object to the right of the small brown bicycle shaped like the small brown rubber object?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 21.34375, "loss": 5.5531325340271 }, { "id": "identity_171912", "image": "TabMWP/images/6710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Herman need to buy two cookie sheets and two large plates?" }, { "from": "gpt", "value": "The answer is $24" } ], "clip_score": 29.859375, "loss": 2.1867024898529053 }, { "id": "identity_114070", "image": "PMC-VQA/images/PMC7339413_Fig3_206162.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the mass located in the left or right side of the patient's body?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 27.734375, "loss": 3.9581592082977295 }, { "id": "identity_250269", "image": "VQA-AS/images/8801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many items are in the room excluding the coat rack?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.234375, "loss": 2.150132894515991 }, { "id": "identity_108141", "image": "TextVQA/images/fc3ca7ad572166dc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two dates mentioned on the posters?" }, { "from": "gpt", "value": "The answer is May 30 and October 7, 2010" } ], "clip_score": 19.140625, "loss": 1.688262939453125 }, { "id": "identity_309380", "image": "UniGeo/images/373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在⊙O中,OA垂直于BC,∠ADB=25°.那么∠AOC的度数是多少?\n选项:\n(A) 30°\n(B) 45°\n(C) 50°\n(D) 55°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.53125, "loss": 2.8582100868225098 }, { "id": "identity_283080", "image": "IconQA/images/60270.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the fraction of circles among the shapes?\nChoices:\n(A) 1/5\n(B) 5/11\n(C) 1/9\n(D) 7/11" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.65625, "loss": 3.7039108276367188 }, { "id": "identity_239653", "image": "IconQA/images/107163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the digits of the number shown?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 16.8125, "loss": 1.854777455329895 }, { "id": "identity_327020", "image": "PMC-VQA/images/PMC7780869_F2_334052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where was the ground glass density observed in Patient 2's CT scan?\nChoices:\n(A) Upper lobes\n(B) Middle lobes\n(C) Lower lobes\n(D) Entire lung." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.265625, "loss": 4.260047435760498 }, { "id": "identity_246894", "image": "ScienceQA/images/17931.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly to the north of the colony shown and what is its capital city?" }, { "from": "gpt", "value": "The answer is Virginia, Richmond" } ], "clip_score": 29.609375, "loss": 3.0469892024993896 }, { "id": "identity_131719", "image": "Super-CLEVR/images/superCLEVR_new_000760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the object in the front left of the image larger than the object in the back right?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.140625, "loss": 3.328187942504883 }, { "id": "identity_25680", "image": "VQA-AS/images/16398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did somebody leave the TV on?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.53125, "loss": 3.347698211669922 }, { "id": "identity_270013", "image": "PMC-VQA/images/PMC7080855_F1_137687.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being depicted in the image?\nChoices:\n(A) Lumbar puncture\n(B) CT scan\n(C) MRI scan\n(D) X-ray scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.140625, "loss": 4.64006233215332 }, { "id": "identity_277536", "image": "PMC-VQA/images/PMC6108967_Fig6_358725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On which side is pleural effusion visible? \nChoices:\n(A) Right\n(B) Left\n(C) Both sides\n(D) No pleural effusion" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.629117012023926 }, { "id": "identity_38715", "image": "A-OKVQA/images/360673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this country?\nChoices:\n(A) india\n(B) united states\n(C) china\n(D) italy" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 13.015625, "loss": 4.33905553817749 }, { "id": "identity_203445", "image": "TextVQA/images/018c95ebb9fece08.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different product names are visible in the image?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 18.109375, "loss": 2.17976975440979 }, { "id": "identity_226690", "image": "IconQA/images/34609.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides of all the shapes combined?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 27.90625, "loss": 1.6382572650909424 }, { "id": "identity_25422", "image": "TextVQA/images/063d10037d4ade2d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the store with the red sign sell?" }, { "from": "gpt", "value": "The answer is booze" } ], "clip_score": 17.390625, "loss": 2.4619333744049072 }, { "id": "identity_221173", "image": "VizWiz/images/VizWiz_train_00016154.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the surface the glass is placed on?" }, { "from": "gpt", "value": "The answer is Light color (white or beige)" } ], "clip_score": 26.4375, "loss": 2.948305130004883 }, { "id": "identity_138783", "image": "MapQA/images/map_1306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the number of adults in Illinois?" }, { "from": "gpt", "value": "The answer is 308494-377914" } ], "clip_score": 28.984375, "loss": 2.386329412460327 }, { "id": "identity_175163", "image": "TabMWP/images/19223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did Cody walk an even or odd number of total kilometers over the 5 days?" }, { "from": "gpt", "value": "The answer is Odd" } ], "clip_score": 25.421875, "loss": 2.045542001724243 }, { "id": "identity_115709", "image": "PMC-VQA/images/PMC5380283_F3_149141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 33.28125, "loss": 2.3405659198760986 }, { "id": "identity_266973", "image": "ChartQA/images/multi_col_40519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest urban population value between males and females?" }, { "from": "gpt", "value": "The answer is 195.5" } ], "clip_score": 23.609375, "loss": 2.2085227966308594 }, { "id": "identity_243106", "image": "TextVQA/images/d3bdf740a9533be9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the color of the shirt worn by the person standing next to the person wearing the green shirt with \"wifa\" written on it?" }, { "from": "gpt", "value": "The answer is YOUR CORRECT ANSWER" } ], "clip_score": 11.53125, "loss": 2.7809078693389893 }, { "id": "identity_43732", "image": "FigureQA/images/34256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents Aqua in the pie chart?" }, { "from": "gpt", "value": "The answer is Light blue" } ], "clip_score": 36.0, "loss": 3.1609065532684326 }, { "id": "identity_169713", "image": "IconQA/images/102840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a stick and is broken into two pieces at the 4cm mark, what is the length of the shorter piece?" }, { "from": "gpt", "value": "The answer is 4 centimeters" } ], "clip_score": 31.40625, "loss": 1.8198621273040771 }, { "id": "identity_197430", "image": "TextVQA/images/fb175d04a9d56ec7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the leaves on the tree to the right in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 8.2578125, "loss": 3.5441951751708984 }, { "id": "identity_61512", "image": "DocVQA/images/nznm0227_31.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last session scheduled in the Shoreham Forum room for the week?" }, { "from": "gpt", "value": "The answer is APS workshop" } ], "clip_score": 18.96875, "loss": 3.9930806159973145 }, { "id": "identity_188645", "image": "TextVQA/images/84907cf5f16cf785.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is written on the top right corner of the image?" }, { "from": "gpt", "value": "The answer is 1952" } ], "clip_score": 16.640625, "loss": 1.6029925346374512 }, { "id": "identity_330028", "image": "TextVQA/images/272833ec5655d8de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of genre?" }, { "from": "gpt", "value": "The answer is neopop" } ], "clip_score": 15.3125, "loss": 3.9128129482269287 }, { "id": "identity_23374", "image": "TextVQA/images/e1fc432e681bc41f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what time did it take place?" }, { "from": "gpt", "value": "The answer is 5:47 pm" } ], "clip_score": 15.328125, "loss": 2.4531023502349854 }, { "id": "identity_284479", "image": "DocVQA/images/kxmg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is specified as the check date in the provided form?" }, { "from": "gpt", "value": "The answer is 08 31" } ], "clip_score": 19.265625, "loss": 2.8838932514190674 }, { "id": "identity_56955", "image": "DVQA/images/bar_train_00182685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people who preferred objects in the category 'march'?" }, { "from": "gpt", "value": "The answer is 140%" } ], "clip_score": 32.375, "loss": 2.022170066833496 }, { "id": "identity_42812", "image": "DocVQA/images/hqvw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the article authored by Schneider HPG?" }, { "from": "gpt", "value": "The answer is Overview of the comparative efficacy, safety, bleeding profile, and effects on lipids and body weight of a sequential hormone replacement therapy preparation containing 2 mg 17β-estradiol and 0.5 mg trimegestone" } ], "clip_score": 18.59375, "loss": 2.064470052719116 }, { "id": "identity_47094", "image": "DocVQA/images/qhxn0020_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total quarterly dollar amount for the contract?" }, { "from": "gpt", "value": "The answer is 375.00" } ], "clip_score": 22.390625, "loss": 1.7390896081924438 }, { "id": "identity_237287", "image": "ScienceQA/images/13385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state of matter of the water in the bathtub, and explain how the molecular structure of water changes when it transitions from a solid to a liquid?" }, { "from": "gpt", "value": "The answer is Liquid; molecules move freely, breaking and reforming hydrogen bonds" } ], "clip_score": 25.609375, "loss": 2.888784170150757 }, { "id": "identity_241278", "image": "VQA2.0/images/209809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the specific decorations and colors on the cake that indicate it is for a baby boy, and how do they collectively suggest this?" }, { "from": "gpt", "value": "The answer is Blue decorations, number \"1\" candle, children's show characters" } ], "clip_score": 31.53125, "loss": 3.1971828937530518 }, { "id": "identity_63207", "image": "AI2D/images/1468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do we call the mixture of particles represented by the grey cloud above the volcano?" }, { "from": "gpt", "value": "The answer is Ash and steam" } ], "clip_score": 31.0, "loss": 2.967965841293335 }, { "id": "identity_243898", "image": "FigureQA/images/74728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total area under the curve for each color, and which color has the second largest area under the curve?" }, { "from": "gpt", "value": "The answer is Deep Sky Blue" } ], "clip_score": 29.125, "loss": 3.163470983505249 }, { "id": "identity_201848", "image": "PMC-VQA/images/PMC8946311_cancers-14-01509-f002_230142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are indicated by the green arrows in the image?" }, { "from": "gpt", "value": "The answer is Lymphocytes" } ], "clip_score": 27.15625, "loss": 2.519773244857788 }, { "id": "identity_321118", "image": "TextVQA/images/d28f9e9f6ae4d3ff.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much is the prize?" }, { "from": "gpt", "value": "The answer is 30,000" } ], "clip_score": 12.984375, "loss": 1.3960261344909668 }, { "id": "identity_133675", "image": "Super-CLEVR/images/superCLEVR_new_000293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the green motorcycle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.875, "loss": 2.1059250831604004 }, { "id": "identity_58692", "image": "MapQA/images/map_8651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value range for the state of Texas?" }, { "from": "gpt", "value": "The answer is 800k-820k" } ], "clip_score": 24.484375, "loss": 1.6630059480667114 }, { "id": "identity_79988", "image": "GeoQA+/images/4841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD and BE are angle bisectors intersecting at O, what is the measure of ∠AOB?" }, { "from": "gpt", "value": "The answer is 114°" } ], "clip_score": 33.28125, "loss": 2.549520969390869 }, { "id": "identity_101674", "image": "TabMWP/images/13998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more entries were there in 2014 compared to 2018?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 32.5625, "loss": 2.2185001373291016 }, { "id": "identity_15777", "image": "Super-CLEVR/images/superCLEVR_new_001438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the big yellow bicycle and the bicycle that is behind the big metal road bike made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 27.15625, "loss": 5.428262710571289 }, { "id": "identity_278814", "image": "Geometry3K/images/262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that each pair of polygons is similar, what is the value of B C? \n(A) 6.25\n(B) 8\n(C) 10\n(D) 12" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.75, "loss": 3.374401569366455 }, { "id": "identity_1305", "image": "TQA/images/question_images/atomic_structure_8008.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following can be found inside of a Proton?\nChoices:\n(A) Quark\n(B) Nucleus\n(C) Electron\n(D) Neutron" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.6875, "loss": 3.9343528747558594 }, { "id": "identity_239066", "image": "TextVQA/images/12c82a20c9c89c02.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the station where the train on the left is located, and how many people are visible on the platform?" }, { "from": "gpt", "value": "The answer is The station name is not visible, and there are five people" } ], "clip_score": 29.46875, "loss": 2.9901981353759766 }, { "id": "identity_332776", "image": "IconQA/images/48544.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this object have a square as a face?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.5, "loss": 3.789456844329834 }, { "id": "identity_68211", "image": "AI2D/images/3312.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part in the image is the site of cellular respiration?" }, { "from": "gpt", "value": "The answer is Mitochondrion" } ], "clip_score": 27.328125, "loss": 1.516431212425232 }, { "id": "identity_288613", "image": "Geometry3K/images/1819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 33\n(B) 51\n(C) 84\n(D) 117" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.015625, "loss": 2.6035656929016113 }, { "id": "identity_38142", "image": "PMC-VQA/images/PMC7713440_Fig1_314367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What condition is the patient depicted in images a-i suffering from? \nChoices:\n(A) Multiple sclerosis \n(B) Crohn's disease \n(C) Autoimmune retinopathy \n(D) Astigmatism " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.15625, "loss": 4.623376846313477 }, { "id": "identity_315714", "image": "PMC-VQA/images/PMC3474213_fig4_160836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of epithelium covers the indentation of the tissue in the image?\nChoices:\n(A) Simple columnar \n(B) Stratified squamous \n(C) Pseudostratified \n(D) Cuboidal" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.078125, "loss": 4.017224311828613 }, { "id": "identity_322914", "image": "DVQA/images/bar_train_00161274.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first group of bars from here?" }, { "from": "gpt", "value": "The answer is blood" } ], "clip_score": 20.703125, "loss": 3.559032917022705 }, { "id": "identity_110696", "image": "VQA2.0/images/532583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the uniform color of the team on the field?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 19.5, "loss": 3.4477851390838623 }, { "id": "identity_225663", "image": "DocVQA/images/mjvg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the page numbers for \"Aid to the Permanently and Totally Disabled,\" \"Aid to the Blind,\" and \"Medical Care For Public Assistance Recipients\"?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 18.28125, "loss": 2.342175006866455 }, { "id": "identity_71451", "image": "ScienceQA/images/12185.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the glue bottle?" }, { "from": "gpt", "value": "The answer is Cylindrical" } ], "clip_score": 26.046875, "loss": 2.373995542526245 }, { "id": "identity_175389", "image": "IconQA/images/21292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many turtles are there in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.828125, "loss": 2.4806010723114014 }, { "id": "identity_96753", "image": "TabMWP/images/18873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total revenue from cheese pizza sales across all restaurants?" }, { "from": "gpt", "value": "The answer is $58" } ], "clip_score": 37.84375, "loss": 2.440267562866211 }, { "id": "identity_86323", "image": "GeoQA+/images/912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the cone in the image with a slant height of 6cm and a base radius of 3cm, what is the area of the base of the cone?" }, { "from": "gpt", "value": "The answer is 9π cm^2" } ], "clip_score": 36.03125, "loss": 1.471055269241333 }, { "id": "identity_243227", "image": "TextVQA/images/c3dfab45c2860094.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the complete phrase written on the bottom railing, and what is the phrase written on the middle railing?" }, { "from": "gpt", "value": "The answer is MASTERPIECES OF MODERN MAGIC\" and \"THE ZENITH" } ], "clip_score": 19.390625, "loss": 2.70986008644104 }, { "id": "identity_246165", "image": "ScienceQA/images/6737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the direction of the force exerted by the person on the car door, and how does it relate to the direction of the pull on the crutches?" }, { "from": "gpt", "value": "The answer is Toward himself, similar to the pull on the crutches" } ], "clip_score": 31.4375, "loss": 2.9843640327453613 }, { "id": "identity_229618", "image": "GeoQA+/images/4634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,四边形ABCD为矩形,点O为对角线的交点,∠BOC=120°,AE⊥BO交BO于点E,AB=4,若矩形的面积为32,则AE等于多少?" }, { "from": "gpt", "value": "The answer is √{5}" } ], "clip_score": 25.671875, "loss": 3.218411445617676 }, { "id": "identity_311511", "image": "UniGeo/images/352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在圆O中,已知∠AOC=140°,∠ACB=50°,则∠BAC的度数是多少?\n选项:\n(A) 20°\n(B) 40°\n(C) 50°\n(D) 70°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.453125, "loss": 2.5529696941375732 }, { "id": "identity_52974", "image": "DocVQA/images/ssjw0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the President of the Henry J. Kaiser Family Foundation?" }, { "from": "gpt", "value": "The answer is Robert J. Glaser, M.D" } ], "clip_score": 24.09375, "loss": 1.8068029880523682 }, { "id": "identity_142899", "image": "DocVQA/images/grxh0023_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total weight of the items listed on the invoice?" }, { "from": "gpt", "value": "The answer is 96" } ], "clip_score": 23.796875, "loss": 3.2554080486297607 }, { "id": "identity_98935", "image": "IconQA/images/92058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green ovals are there?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 26.53125, "loss": 2.1047568321228027 }, { "id": "identity_303915", "image": "IconQA/images/97655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this dotted line a line of symmetry?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.046875, "loss": 3.7367043495178223 }, { "id": "identity_235036", "image": "AI2D/images/2606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the gravitational pull of Earth's natural satellite affect the tides on Earth, and what are the positions of high and low tides relative to the Moon?" }, { "from": "gpt", "value": "The answer is High tides occur on the sides facing towards and away from the Moon" } ], "clip_score": 34.21875, "loss": 3.146414041519165 }, { "id": "identity_303585", "image": "GeoQA+/images/3928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 将一副直角三角板按如图所示的方式摆放在一起,其中有一个角是90°,另一个角也是90°,还有一个角是30°,一个角是45°,那么∠1+∠2等于多少?\n\n选项:\n(A) 270°\n(B) 210°\n(C) 180°\n(D) 150°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.0625, "loss": 3.0243804454803467 }, { "id": "identity_331144", "image": "ScienceQA/images/4765.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Charleston\n(B) Portland\n(C) Hartford\n(D) Augusta" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.9375, "loss": 4.475327491760254 }, { "id": "identity_121515", "image": "TextVQA/images/4037c8b4f1d31156.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the city where this stadium is located?" }, { "from": "gpt", "value": "The answer is St. Louis" } ], "clip_score": 30.734375, "loss": 2.505405902862549 }, { "id": "identity_196811", "image": "Super-CLEVR/images/superCLEVR_new_001986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned at the far right of the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 26.65625, "loss": 3.537095546722412 }, { "id": "identity_323982", "image": "PMC-VQA/images/PMC5029079_Fig1_74770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the thoracic window shown in the images?\nChoices:\n(A) Mediastinal Window\n(B) Pulmonary Window\n(C) Abdominothoracic Window\n(D) Cardiovascular Window" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.5625, "loss": 4.148409366607666 }, { "id": "identity_151170", "image": "ScienceQA/images/4555.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state is located directly to the east of Wyoming?" }, { "from": "gpt", "value": "The answer is Nebraska" } ], "clip_score": 28.390625, "loss": 2.1785707473754883 }, { "id": "identity_253149", "image": "A-OKVQA/images/275997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of items are visible in the room, and what are they?" }, { "from": "gpt", "value": "The answer is Stuffed animals, DVDs/books, plastic bag, TV, DVD player, small bags, books, lamp, decorative items, plastic drawer unit, trash can" } ], "clip_score": 27.140625, "loss": 2.642868995666504 }, { "id": "identity_140505", "image": "DocVQA/images/lhjh0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date on the letter?" }, { "from": "gpt", "value": "The answer is January 13, 1969" } ], "clip_score": 21.0, "loss": 1.7787126302719116 }, { "id": "identity_125346", "image": "TextVQA/images/19a1d5b0855774bc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is it on the clock in the image?" }, { "from": "gpt", "value": "The answer is 10:30" } ], "clip_score": 23.046875, "loss": 1.677079439163208 }, { "id": "identity_270024", "image": "GeoQA+/images/4880.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C lie on circle O, with angle BOC measuring 100°. What is the degree measure of angle BAC?\nChoices:\n(A) 70°\n(B) 60°\n(C) 50°\n(D) 40°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.4375, "loss": 2.5098717212677 }, { "id": "identity_226955", "image": "ScienceQA/images/20356.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the top magnet is rotated 180 degrees around its horizontal axis, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.421875, "loss": 3.212376117706299 }, { "id": "identity_176107", "image": "TabMWP/images/296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Edmond need to buy a bearded dragon and a hedgehog?" }, { "from": "gpt", "value": "The answer is $174" } ], "clip_score": 32.125, "loss": 2.4293320178985596 }, { "id": "identity_258296", "image": "PMC-VQA/images/PMC6487530_Fig7_472532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between the imaging technique used in the image and an MRI in terms of the type of radiation used, the primary diagnostic applications, and the typical anatomical details visible?" }, { "from": "gpt", "value": "The answer is Radiation type, diagnostic applications, and anatomical details" } ], "clip_score": 31.953125, "loss": 2.158768892288208 }, { "id": "identity_62419", "image": "DocVQA/images/jhpc0227_26.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different sections or groups are listed in the distribution of the report?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 18.078125, "loss": 2.234102487564087 }, { "id": "identity_262022", "image": "A-OKVQA/images/57617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely outcome for the car being towed at the back of the tow truck?\nChoices:\n(A) trashed\n(B) driven away\n(C) buffed out\n(D) repaired" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.71875, "loss": 5.0266289710998535 }, { "id": "identity_237227", "image": "TabMWP/images/5137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of couches sold per day over the six days, and how does this compare to the median number of couches sold?" }, { "from": "gpt", "value": "The answer is Average ≈ 4.17, Median = 4" } ], "clip_score": 35.28125, "loss": 2.3742172718048096 }, { "id": "identity_24327", "image": "IconQA/images/25869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cans are there?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 27.921875, "loss": 1.6114792823791504 }, { "id": "identity_197675", "image": "PMC-VQA/images/PMC6292007_Fig4_411132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible blood vessels in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.328125, "loss": 3.043156385421753 }, { "id": "identity_42979", "image": "PlotQA/images/10071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country has the highest aid received in 2012?" }, { "from": "gpt", "value": "The answer is Latvia" } ], "clip_score": 24.71875, "loss": 3.5380258560180664 }, { "id": "identity_20566", "image": "Geometry3K/images/952.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Trapezoid M N P Q has an area of 360 square feet. Find the length of M N.\nChoices:\n(A) 14\n(B) 16\n(C) 18\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.9375, "loss": 2.8934264183044434 }, { "id": "identity_134554", "image": "DVQA/images/bar_train_00028645.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the bar labeled \"belt\"?" }, { "from": "gpt", "value": "The answer is Approximately 70" } ], "clip_score": 19.28125, "loss": 2.963829755783081 }, { "id": "identity_184452", "image": "PMC-VQA/images/PMC3978227_F4_279485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification of the inset image labeled 'A'?" }, { "from": "gpt", "value": "The answer is The magnification level is not visible" } ], "clip_score": 25.109375, "loss": 3.2523088455200195 }, { "id": "identity_14098", "image": "TQA/images/abc_question_images/human_system_respiratory_11403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter represents the Larynx?\nChoices:\n(A) T\n(B) A\n(C) V\n(D) X" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.265625, "loss": 4.077448844909668 }, { "id": "identity_43244", "image": "DVQA/images/bar_train_00162714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item \"mature\" across all stores?" }, { "from": "gpt", "value": "The answer is an estimated 14 units" } ], "clip_score": 32.21875, "loss": 3.1152567863464355 }, { "id": "identity_81361", "image": "UniGeo/images/3280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点A、B、C、D在⊙O上,OB⊥AC.若∠BOC=56°,则∠BOD的大小为多少度?" }, { "from": "gpt", "value": "The answer is 124°" } ], "clip_score": 31.9375, "loss": 2.0554563999176025 }, { "id": "identity_237497", "image": "A-OKVQA/images/390086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the ingredients in the dish and determine which ingredient is unique based on its preparation method or presentation" }, { "from": "gpt", "value": "The answer is meat" } ], "clip_score": 20.3125, "loss": 5.893764495849609 }, { "id": "identity_29269", "image": "MapQA/images/map_112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Vermont ?" }, { "from": "gpt", "value": "The answer is 57.3%-66.5%" } ], "clip_score": 23.625, "loss": 2.3182005882263184 }, { "id": "identity_117731", "image": "PMC-VQA/images/PMC4328627_F1_358320.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is the marker indicating the area of interest in the LGE image with 'Event -'?" }, { "from": "gpt", "value": "The answer is Triangle" } ], "clip_score": 19.015625, "loss": 3.184349775314331 }, { "id": "identity_68694", "image": "AI2D/images/1098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pointed lobes does the leaf have?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.546875, "loss": 3.4043684005737305 }, { "id": "identity_144438", "image": "DocVQA/images/hlmf0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is ACD Management Review" } ], "clip_score": 31.296875, "loss": 2.2271227836608887 }, { "id": "identity_263199", "image": "VizWiz/images/VizWiz_train_00012432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is glass water" } ], "clip_score": 30.296875, "loss": 3.76977801322937 }, { "id": "identity_17191", "image": "VQA2.0/images/298182.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bike?" }, { "from": "gpt", "value": "The answer is yellow" } ], "clip_score": 27.140625, "loss": 4.0228657722473145 }, { "id": "identity_174827", "image": "IconQA/images/71105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many marbles are in the second row from the top?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.453125, "loss": 2.494580030441284 }, { "id": "identity_104206", "image": "VQA2.0/images/183847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of terrain is the man walking on?" }, { "from": "gpt", "value": "The answer is A groomed ski slope" } ], "clip_score": 25.125, "loss": 3.6253573894500732 }, { "id": "identity_322727", "image": "A-OKVQA/images/556722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the pink circles on the front of this used for?\nChoices:\n(A) visibility\n(B) light emitting\n(C) design\n(D) brand" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.1875, "loss": 4.447451114654541 }, { "id": "identity_90400", "image": "UniGeo/images/1705.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is parallel to CD and the measure of angle 1 is 60 degrees, what is the measure of angle 3?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 30.953125, "loss": 1.4797706604003906 }, { "id": "identity_225499", "image": "TextVQA/images/ef51768d4b1b7253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total number of events scheduled for February 6th?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.171875, "loss": 2.111135244369507 }, { "id": "identity_311802", "image": "ScienceQA/images/12563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of this plant?\nChoices:\n(A) lichen\n(B) bear sedge\n(C) barren-ground caribou\n(D) parasitic jaeger" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.640625, "loss": 4.634925842285156 }, { "id": "identity_141433", "image": "MapQA/images/map_8459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the map?" }, { "from": "gpt", "value": "The answer is The Percentage of needs in the USA" } ], "clip_score": 34.625, "loss": 1.755133867263794 }, { "id": "identity_274591", "image": "DocVQA/images/ymkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the recipient of the letter?" }, { "from": "gpt", "value": "The answer is Dr. Samuel M. Weisberg" } ], "clip_score": 24.40625, "loss": 3.193449020385742 }, { "id": "identity_66650", "image": "ScienceQA/images/1980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pink particles are present in Solution A?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.203125, "loss": 2.8138973712921143 }, { "id": "identity_47558", "image": "ChartQA/images/multi_col_80192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least number of establishments in 2015?" }, { "from": "gpt", "value": "The answer is Sound recording studios" } ], "clip_score": 26.125, "loss": 2.5341506004333496 }, { "id": "identity_180042", "image": "TabMWP/images/6567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the closest price to the average cost of all items?" }, { "from": "gpt", "value": "The answer is Program signed by a famous dancer ($54.32)" } ], "clip_score": 23.453125, "loss": 2.2500791549682617 }, { "id": "identity_92733", "image": "IconQA/images/88763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one star represents 2 points, how many points are represented by all the stars on the frame?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 37.375, "loss": 2.357273817062378 }, { "id": "identity_221583", "image": "A-OKVQA/images/557721.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cones are visible on the ground in a line?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 17.359375, "loss": 2.114992618560791 }, { "id": "identity_194954", "image": "TextVQA/images/1975eff00531f58f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the issue number of this publication?" }, { "from": "gpt", "value": "The answer is No.12" } ], "clip_score": 14.4140625, "loss": 2.4856925010681152 }, { "id": "identity_173130", "image": "TabMWP/images/37988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of offices per floor?" }, { "from": "gpt", "value": "The answer is 29" } ], "clip_score": 28.09375, "loss": 3.0061588287353516 }, { "id": "identity_164219", "image": "UniGeo/images/177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that triangle ABC is inscribed in the circle with center O, and ∠A = 36 degrees, find the measure of ∠ACB" }, { "from": "gpt", "value": "The answer is 72 degrees" } ], "clip_score": 36.0625, "loss": 1.9498710632324219 }, { "id": "identity_254632", "image": "A-OKVQA/images/334400.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Considering the man's yellow shirt is for visibility, identify two other elements in the image that contribute to safety and explain their roles" }, { "from": "gpt", "value": "The answer is Stop sign and crosswalk ensure safety" } ], "clip_score": 22.265625, "loss": 4.0807390213012695 }, { "id": "identity_125099", "image": "A-OKVQA/images/401842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing are the two individuals wearing?" }, { "from": "gpt", "value": "The answer is Pants and long-sleeved tops" } ], "clip_score": 12.015625, "loss": 2.1106019020080566 }, { "id": "identity_257592", "image": "PMC-VQA/images/PMC4821972_fig4_18602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of the region indicated by the red arrow in the FA image, and how do these characteristics help in diagnosing the condition?" }, { "from": "gpt", "value": "The answer is Hyperfluorescence indicates CNV due to abnormal blood vessels" } ], "clip_score": 30.953125, "loss": 3.498831033706665 }, { "id": "identity_54975", "image": "MapQA/images/map_5371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the highest percentage of servants?" }, { "from": "gpt", "value": "The answer is Mississippi" } ], "clip_score": 34.09375, "loss": 2.6088690757751465 }, { "id": "identity_258324", "image": "ScienceQA/images/10728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the smallest U.S. state by area and name the state it belongs to" }, { "from": "gpt", "value": "The answer is Providence, Rhode Island" } ], "clip_score": 29.703125, "loss": 1.6793715953826904 }, { "id": "identity_176056", "image": "TabMWP/images/18273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of watermelons harvested from Thursday to Sunday?" }, { "from": "gpt", "value": "The answer is 21,991" } ], "clip_score": 33.21875, "loss": 2.3612406253814697 }, { "id": "identity_122854", "image": "A-OKVQA/images/165492.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bench near the boat?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 34.125, "loss": 2.886101245880127 }, { "id": "identity_289333", "image": "CLEVR-Math/images/CLEVR_train_010789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the green blocks and blue balls, how many blocks are remaining?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.03125, "loss": 2.3230364322662354 }, { "id": "identity_58451", "image": "DocVQA/images/yscw0217_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source link of the document?" }, { "from": "gpt", "value": "The answer is https://www.industrydocuments.ucsf.edu/docs/yscw0217" } ], "clip_score": 21.609375, "loss": 1.8053146600723267 }, { "id": "identity_84013", "image": "UniGeo/images/1025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of circle O and CD is a chord of circle O, and the measure of angle ABD is 58 degrees, what is the measure of angle BDC?" }, { "from": "gpt", "value": "The answer is 29 degrees" } ], "clip_score": 36.03125, "loss": 2.1617965698242188 }, { "id": "identity_184594", "image": "PMC-VQA/images/PMC7935200_FIG2_385034.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the spleen be visualized in this image?" }, { "from": "gpt", "value": "The answer is No, the spleen is not visible" } ], "clip_score": 31.1875, "loss": 1.9463272094726562 }, { "id": "identity_151642", "image": "ScienceQA/images/7110.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which story is set in a medieval context?" }, { "from": "gpt", "value": "The answer is A Connecticut Yankee in King Arthur's Court" } ], "clip_score": 30.859375, "loss": 1.2170342206954956 }, { "id": "identity_309183", "image": "DVQA/images/bar_train_00160165.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left in these groups?" }, { "from": "gpt", "value": "The answer is folk" } ], "clip_score": 18.828125, "loss": 4.462826251983643 }, { "id": "identity_54596", "image": "DocVQA/images/jkmm0020_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Outlet Type?" }, { "from": "gpt", "value": "The answer is PACK/CTN/CTS" } ], "clip_score": 21.828125, "loss": 4.214465141296387 }, { "id": "identity_209456", "image": "VQA-AS/images/18470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it likely to be based on the position of the sun in the image?" }, { "from": "gpt", "value": "The answer is Morning or late afternoon" } ], "clip_score": 23.296875, "loss": 3.0547304153442383 }, { "id": "identity_193457", "image": "VQA2.0/images/673.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of landscape is visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Coastal landscape" } ], "clip_score": 15.1796875, "loss": 3.3613851070404053 }, { "id": "identity_267638", "image": "TabMWP/images/29541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Lillian requires to purchase both an atlas and an encyclopedia?" }, { "from": "gpt", "value": "The answer is 31" } ], "clip_score": 25.03125, "loss": 3.0933778285980225 }, { "id": "identity_211743", "image": "VQA2.0/images/197754.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any human presence visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 17.703125, "loss": 2.660060405731201 }, { "id": "identity_173231", "image": "TabMWP/images/30863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median score of the game show contestants?" }, { "from": "gpt", "value": "The answer is 43" } ], "clip_score": 25.40625, "loss": 2.9580318927764893 }, { "id": "identity_218470", "image": "VizWiz/images/VizWiz_train_00007203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is in the room?" }, { "from": "gpt", "value": "The answer is Tiled" } ], "clip_score": 23.734375, "loss": 3.8300468921661377 }, { "id": "identity_243949", "image": "AI2D/images/4019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the layers of the Earth starting from the crust to the inner core, and what are their states (solid or liquid)?" }, { "from": "gpt", "value": "The answer is Crust (solid), Mantle (solid), Outer core (liquid), Inner core (solid)" } ], "clip_score": 35.78125, "loss": 0.7417559623718262 }, { "id": "identity_334007", "image": "GeoQA+/images/651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把一个角沿着直线MN平移一定的距离,得到另一个角,若∠AOM=40°,∠DPN=40°,则∠AOB的度数为()\nChoices:\n(A) 100°\n(B) 110°\n(C) 120°\n(D) 130°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.859375, "loss": 2.3326528072357178 }, { "id": "identity_255987", "image": "PMC-VQA/images/PMC5815779_F1_276828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps to diagnose pleural effusion and determine its severity using imaging techniques, and which imaging technique is most commonly used for initial diagnosis?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 30.25, "loss": 3.2343051433563232 }, { "id": "identity_126771", "image": "A-OKVQA/images/484619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tablecloth?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.453125, "loss": 4.002098083496094 }, { "id": "identity_135856", "image": "DVQA/images/bar_train_00085206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the algorithm labeled \"sake\" have an accuracy greater than 2?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.1875, "loss": 3.050044536590576 }, { "id": "identity_239341", "image": "Super-CLEVR/images/superCLEVR_new_000794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the materials of the cyan fighter and the cyan motorcycle?" }, { "from": "gpt", "value": "The answer is rubber and metal" } ], "clip_score": 28.65625, "loss": 3.0684053897857666 }, { "id": "identity_280935", "image": "Geometry3K/images/1840.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the measure of angle W?\nChoices:\n(A) 31\n(B) 62\n(C) 108\n(D) 124" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5, "loss": 3.2655022144317627 }, { "id": "identity_17840", "image": "GeoQA+/images/709.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O中,OA⊥BC,∠AOC=50°,则∠ADB的度数为()\nChoices:\n(A) 15°\n(B) 25°\n(C) 30°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.09375, "loss": 2.6621196269989014 }, { "id": "identity_151118", "image": "ScienceQA/images/14859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What characteristic of the object in the image indicates its high value?" }, { "from": "gpt", "value": "The answer is Its composition as gold" } ], "clip_score": 30.3125, "loss": 5.320903301239014 }, { "id": "identity_43696", "image": "DocVQA/images/kkln0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ID number at the top right corner of the report?" }, { "from": "gpt", "value": "The answer is 919-741-5186" } ], "clip_score": 26.09375, "loss": 2.3098182678222656 }, { "id": "identity_278466", "image": "IconQA/images/17474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there more pieces of paper than crayons?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.203125, "loss": 3.4545950889587402 }, { "id": "identity_274904", "image": "TextVQA/images/0d9c6dd4409d6f2d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the school that is written on the lady's jersey?" }, { "from": "gpt", "value": "The answer is high point" } ], "clip_score": 25.21875, "loss": 3.6567447185516357 }, { "id": "identity_93207", "image": "TabMWP/images/13153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students scored less than 7?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 32.8125, "loss": 2.219371795654297 }, { "id": "identity_244474", "image": "VQA2.0/images/218558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many baskets are visible in the image, and what items are in each basket?" }, { "from": "gpt", "value": "The answer is two baskets: one with a sandwich, one with fries" } ], "clip_score": 24.671875, "loss": 2.0172195434570312 }, { "id": "identity_113721", "image": "TextVQA/images/e1e7f00f03334674.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is shown in the image?" }, { "from": "gpt", "value": "The answer is Sports court flooring" } ], "clip_score": 23.234375, "loss": 3.628739356994629 }, { "id": "identity_116642", "image": "VizWiz/images/VizWiz_train_00009199.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the text on the cup?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 26.765625, "loss": 4.3039116859436035 }, { "id": "identity_92949", "image": "TabMWP/images/11837.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of characters per book?" }, { "from": "gpt", "value": "The answer is 17 and 37 (two modes)" } ], "clip_score": 35.625, "loss": 2.486889600753784 }, { "id": "identity_173516", "image": "TabMWP/images/17468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of buying 1 box of bandages, 1 lip balm, 1 bottle of hand lotion, 1 toothbrush, and 1 bottle of shampoo?" }, { "from": "gpt", "value": "The answer is $14.52" } ], "clip_score": 39.3125, "loss": 2.0736539363861084 }, { "id": "identity_336162", "image": "IconQA/images/64936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a train. Which image is seventh?\nChoices:\n(A) train\n(B) plane\n(C) bear" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.40625, "loss": 4.141136169433594 }, { "id": "identity_215509", "image": "TextVQA/images/d69baf7dd2e52d62.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which production company is mentioned at the top of the poster?" }, { "from": "gpt", "value": "The answer is Universal Film, Inc" } ], "clip_score": 20.375, "loss": 2.8189074993133545 }, { "id": "identity_213966", "image": "VizWiz/images/VizWiz_train_00013716.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does this image suggest?" }, { "from": "gpt", "value": "The answer is Early morning" } ], "clip_score": 24.0625, "loss": 3.790926933288574 }, { "id": "identity_255960", "image": "ChartQA/images/multi_col_100782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the number of respondents using smartphones for traveling is 1000, what is the total number of respondents using smartphones for all phases combined?" }, { "from": "gpt", "value": "The answer is 5060" } ], "clip_score": 30.0, "loss": 2.451782464981079 }, { "id": "identity_28531", "image": "PlotQA/images/57085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the percentage of firms who use bank loans for their financing in 2004 in Low & middle income to that in World ?" }, { "from": "gpt", "value": "The answer is 0.9186206896551711" } ], "clip_score": 21.546875, "loss": 2.253314256668091 }, { "id": "identity_208063", "image": "TextVQA/images/864d42afcee37f04.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ropes are visible in the boxing ring?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.125, "loss": 3.9005870819091797 }, { "id": "identity_128459", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the wheels on the purple bicycle?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 22.359375, "loss": 4.137035846710205 }, { "id": "identity_330569", "image": "DocVQA/images/hmvj0223_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which report is mentioned here?" }, { "from": "gpt", "value": "The answer is Auditors report" } ], "clip_score": 21.65625, "loss": 3.284243106842041 }, { "id": "identity_224401", "image": "TQA/images/question_images/seasons_2633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of events starting from the winter solstice and ending at the autumnal equinox?" }, { "from": "gpt", "value": "The answer is Winter solstice, vernal equinox, summer solstice, autumnal equinox" } ], "clip_score": 31.828125, "loss": 1.1032812595367432 }, { "id": "identity_306623", "image": "Geometry3K/images/1884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, m \\angle 1 = 50 and m \\angle 3 = 60. Determine the measure of \\angle 2.\nChoices:\n(A) 100\n(B) 110\n(C) 120\n(D) 130" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.375, "loss": 2.457012176513672 }, { "id": "identity_87451", "image": "UniGeo/images/2336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the equilateral triangle ABC with side length 9, if BD is 3 and ∠ADE is 60°, what is the length of DE?" }, { "from": "gpt", "value": "The answer is 7.794228634059948" } ], "clip_score": 34.84375, "loss": 2.61722469329834 }, { "id": "identity_222656", "image": "A-OKVQA/images/197918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of toy is the child on the right holding?" }, { "from": "gpt", "value": "The answer is A pink hairbrush toy" } ], "clip_score": 26.421875, "loss": 2.9320666790008545 }, { "id": "identity_261736", "image": "FigureQA/images/30756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Steel Blue or Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0625, "loss": 4.701024055480957 }, { "id": "identity_126053", "image": "PMC-VQA/images/PMC3016500_F0003_83178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of the arrows in the angiogram?" }, { "from": "gpt", "value": "The answer is The arrows are pointing to areas within the blood vessels" } ], "clip_score": 29.28125, "loss": 2.6737115383148193 }, { "id": "identity_87664", "image": "GeoQA+/images/5508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠A = 70° and BD and CD are the external angle bisectors of ∠B and ∠C respectively, what is the measure of ∠BDC?" }, { "from": "gpt", "value": "The answer is 130°" } ], "clip_score": 32.28125, "loss": 1.5770286321640015 }, { "id": "identity_160422", "image": "GeoQA+/images/1600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle AOM is 20 degrees and OM bisects angle AOC, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 31.859375, "loss": 1.7457473278045654 }, { "id": "identity_307035", "image": "UniGeo/images/2562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,构成伸缩门的基本图形是菱形,每个小菱形的边长为0.4米,伸缩门伸展到最宽时为8米,此时菱形的一个内角为60°。若中间隔板的厚度不计,则图中的n为多少?\n\n选项:\n(A) 10\n(B) 15\n(C) 20\n(D) 25" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.25, "loss": 2.5560860633850098 }, { "id": "identity_62391", "image": "FigureQA/images/61501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Dark Gold throughout the graph?" }, { "from": "gpt", "value": "The answer is The range is approximately 1.2 (from 91 to 92.2)" } ], "clip_score": 27.09375, "loss": 2.6709420680999756 }, { "id": "identity_316695", "image": "A-OKVQA/images/292833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type is the red vehicle?\nChoices:\n(A) hatchback\n(B) sedan\n(C) station wagon\n(D) coupe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.0625, "loss": 4.938811302185059 }, { "id": "identity_102616", "image": "CLEVR-Math/images/CLEVR_train_064215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple objects are there?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.953125, "loss": 2.334188222885132 }, { "id": "identity_157601", "image": "AI2D/images/2105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organisms are secondary consumers in this food web?" }, { "from": "gpt", "value": "The answer is Beetle larva, Ground beetle, Earwig, or Millipede" } ], "clip_score": 30.890625, "loss": 1.7175301313400269 }, { "id": "identity_231188", "image": "UniGeo/images/916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,C、D是⊙O上的两点,若∠ABC=70°,求∠ACD的度数。" }, { "from": "gpt", "value": "The answer is 230°" } ], "clip_score": 29.5625, "loss": 1.6015037298202515 }, { "id": "identity_273866", "image": "TextVQA/images/002e3dbb6a9cb03c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On the far left banner, what year is displayed?" }, { "from": "gpt", "value": "The answer is 2015" } ], "clip_score": 15.5390625, "loss": 1.4281182289123535 }, { "id": "identity_323633", "image": "PMC-VQA/images/PMC4140120_fig1_314517.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of tissue is depicted in this image?\nChoices:\n(A) Kidney tissue\n(B) Brain tissue\n(C) Lung tissue\n(D) Heart tissue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.3125, "loss": 4.645296573638916 }, { "id": "identity_217526", "image": "A-OKVQA/images/271972.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the back of the player's jersey?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 13.828125, "loss": 2.333803176879883 }, { "id": "identity_249969", "image": "IconQA/images/92280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the sequence of pictures repeats after the fifth picture, what is the 12th picture in the sequence?" }, { "from": "gpt", "value": "The answer is rocket" } ], "clip_score": 19.25, "loss": 3.06059193611145 }, { "id": "identity_46024", "image": "ChartQA/images/10733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people in Germany who prefer to help other countries deal with their problems?" }, { "from": "gpt", "value": "The answer is 43%" } ], "clip_score": 33.84375, "loss": 1.5477288961410522 }, { "id": "identity_52587", "image": "DocVQA/images/zlmg0227_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the attention line for the U.S. Army Medical Research and Development Command's distribution?" }, { "from": "gpt", "value": "The answer is Chief, Reports Branch" } ], "clip_score": 20.734375, "loss": 2.9983224868774414 }, { "id": "identity_140221", "image": "ChartQA/images/5112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Americans believe NATO does the right amount to help solve global problems?" }, { "from": "gpt", "value": "The answer is 31%" } ], "clip_score": 38.21875, "loss": 1.7273346185684204 }, { "id": "identity_9580", "image": "TextVQA/images/5cf9d6c7c4a8d247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the title of this comic?" }, { "from": "gpt", "value": "The answer is adam jack nubile thangs" } ], "clip_score": 22.59375, "loss": 1.5578230619430542 }, { "id": "identity_255170", "image": "IconQA/images/69779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Roy started jumping in the mud at 1:45 P.M. and continued for 45 minutes, what time does his watch show now?" }, { "from": "gpt", "value": "The answer is 2:30 P.M" } ], "clip_score": 26.34375, "loss": 1.1819368600845337 }, { "id": "identity_19106", "image": "DVQA/images/bar_train_00061583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across all the datasets?" }, { "from": "gpt", "value": "The answer is radio" } ], "clip_score": 32.375, "loss": 2.772059917449951 }, { "id": "identity_202203", "image": "PMC-VQA/images/PMC4933353_ppat.1005739.g001_47959.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to capture this image?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT)" } ], "clip_score": 26.46875, "loss": 1.6321322917938232 }, { "id": "identity_193310", "image": "A-OKVQA/images/300753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand advertisement is visible on the outfield wall?" }, { "from": "gpt", "value": "The answer is Dunkin' Donuts" } ], "clip_score": 27.1875, "loss": 1.815111517906189 }, { "id": "identity_176428", "image": "TabMWP/images/9844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average cost per pound of all the squashes listed?" }, { "from": "gpt", "value": "The answer is $1.67" } ], "clip_score": 34.0, "loss": 1.6578679084777832 }, { "id": "identity_266030", "image": "TabMWP/images/14676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The farmer recorded the amount of cherries sold in the past 5 days. What is the median of these recorded numbers?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 24.921875, "loss": 2.854747772216797 }, { "id": "identity_14126", "image": "ChartQA/images/58486764007058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times Uzbekistan and Equatorial Guinea graph crossed each other?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.484375, "loss": 2.6786556243896484 }, { "id": "identity_238193", "image": "Super-CLEVR/images/superCLEVR_new_001989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many small-sized vehicles are there in the image, and what types are they?" }, { "from": "gpt", "value": "The answer is Four; scooter, bicycle, car, bus" } ], "clip_score": 21.953125, "loss": 3.148275852203369 }, { "id": "identity_282432", "image": "IconQA/images/80989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of stars?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 29.140625, "loss": 1.8035366535186768 }, { "id": "identity_50144", "image": "FigureQA/images/52115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have a length greater than 30?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.28125, "loss": 2.6322827339172363 }, { "id": "identity_122663", "image": "VQA-AS/images/612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the ground?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.4375, "loss": 3.3606948852539062 }, { "id": "identity_308239", "image": "A-OKVQA/images/482081.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many giraffes are visible in the image above?\nChoices:\n(A) four\n(B) one\n(C) five\n(D) none" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.828125, "loss": 4.220938682556152 }, { "id": "identity_188928", "image": "PMC-VQA/images/PMC4923528_fig1_45587.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the lateral ventricles symmetrical in this brain image?" }, { "from": "gpt", "value": "The answer is Yes, the lateral ventricles are symmetrical" } ], "clip_score": 34.53125, "loss": 1.508385419845581 }, { "id": "identity_203644", "image": "VQA-AS/images/15387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many butterflies are visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 18.609375, "loss": 2.3581936359405518 }, { "id": "identity_275312", "image": "VizWiz/images/VizWiz_train_00003563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what that is?" }, { "from": "gpt", "value": "The answer is sculpture" } ], "clip_score": 19.640625, "loss": 3.4701099395751953 }, { "id": "identity_139756", "image": "FigureQA/images/67457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any point on the graph where Light Green and Medium Purple intersect?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.09375, "loss": 2.601807117462158 }, { "id": "identity_157117", "image": "TQA/images/question_images/glaciers_6934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is represented by the label \"receding glacier\"?" }, { "from": "gpt", "value": "The answer is Receding glacier represents the shrinking and retreating of a glacier" } ], "clip_score": 36.625, "loss": 2.112826108932495 }, { "id": "identity_107153", "image": "Super-CLEVR/images/superCLEVR_new_001450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle that is the second largest in size?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 22.09375, "loss": 4.492901802062988 }, { "id": "identity_256873", "image": "IconQA/images/51953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line is extended by 2 centimeters, what will be its new length in centimeters?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.796875, "loss": 3.083127975463867 }, { "id": "identity_165217", "image": "UniGeo/images/1934.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the bearing of ship A from lighthouse O is N56°W and the bearing of ship B from lighthouse O is S17°E, what is the angle between the direction of ship A and the direction of ship B?" }, { "from": "gpt", "value": "The answer is 73 degrees" } ], "clip_score": 24.4375, "loss": 2.1443543434143066 }, { "id": "identity_204836", "image": "A-OKVQA/images/324082.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many American flags can be seen in this image?" }, { "from": "gpt", "value": "The answer is Five" } ], "clip_score": 22.3125, "loss": 4.153896808624268 }, { "id": "identity_76180", "image": "AI2D/images/4309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many planets are shown in this solar system diagram?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 31.640625, "loss": 1.9626184701919556 }, { "id": "identity_276139", "image": "VQA-RAD/images/synpic18319.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide a description of the abnormalities found in the lungs?" }, { "from": "gpt", "value": "The answer is Pulmonary nodules" } ], "clip_score": 29.796875, "loss": 3.300968647003174 }, { "id": "identity_68364", "image": "ScienceQA/images/15236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located at the southwestern corner of the map?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 26.8125, "loss": 3.0873188972473145 }, { "id": "identity_211910", "image": "VQA-AS/images/15816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plant is on the table?" }, { "from": "gpt", "value": "The answer is A potted houseplant" } ], "clip_score": 26.84375, "loss": 2.722501039505005 }, { "id": "identity_47580", "image": "FigureQA/images/87018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value that Blue reaches on the graph?" }, { "from": "gpt", "value": "The answer is Approximately 65" } ], "clip_score": 23.640625, "loss": 2.5447542667388916 }, { "id": "identity_179300", "image": "IconQA/images/70061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each heart is duplicated, how many hearts will there be in total?" }, { "from": "gpt", "value": "The answer is 12 hearts" } ], "clip_score": 27.953125, "loss": 1.853085994720459 }, { "id": "identity_38528", "image": "A-OKVQA/images/251427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why can't they travel?\nChoices:\n(A) no wind\n(B) too rainy\n(C) no gas\n(D) no water" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.53125, "loss": 4.224008083343506 }, { "id": "identity_22028", "image": "A-OKVQA/images/7952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What could happen if the white truck parks a few feet directly ahead?\nChoices:\n(A) parade\n(B) collision\n(C) movie\n(D) sunset" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.4375, "loss": 4.544146537780762 }, { "id": "identity_188310", "image": "Super-CLEVR/images/superCLEVR_new_001081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more bicycles than cars in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.03125, "loss": 3.1101980209350586 }, { "id": "identity_228265", "image": "TabMWP/images/3507.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of science articles read by Audrey's classmates?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 35.96875, "loss": 2.561415910720825 }, { "id": "identity_117101", "image": "A-OKVQA/images/50355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is displayed on the blue signs above the toll booths?" }, { "from": "gpt", "value": "The answer is Chinese" } ], "clip_score": 30.890625, "loss": 2.949401617050171 }, { "id": "identity_126440", "image": "TextVQA/images/4f80bad7e3a0ea2a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many innings have been played in the baseball game at the moment this image was taken?" }, { "from": "gpt", "value": "The answer is 6 innings" } ], "clip_score": 21.640625, "loss": 2.07330584526062 }, { "id": "identity_93886", "image": "IconQA/images/95299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of number display does the digital clock use?" }, { "from": "gpt", "value": "The answer is Digital" } ], "clip_score": 30.96875, "loss": 4.152850151062012 }, { "id": "identity_141496", "image": "DocVQA/images/xkjp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time span of the organization Alpha Zeta as mentioned at the bottom of the page?" }, { "from": "gpt", "value": "The answer is 1897 - 1976" } ], "clip_score": 23.28125, "loss": 1.7052891254425049 }, { "id": "identity_252334", "image": "GeoQA+/images/1365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在长方形纸片ABCD中,AD∥BC,将长方形纸片沿BD折叠,点A落在点E处,DE交边BC于点F,若∠ADB=20°,求∠DEF的大小。" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 25.65625, "loss": 1.7104111909866333 }, { "id": "identity_131738", "image": "A-OKVQA/images/196341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the architectural style of the building where this clock is located?" }, { "from": "gpt", "value": "The answer is Traditional or historical architectural style" } ], "clip_score": 27.203125, "loss": 2.951519727706909 }, { "id": "identity_182865", "image": "A-OKVQA/images/246183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visibly using umbrellas?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 21.609375, "loss": 4.859888076782227 }, { "id": "identity_13776", "image": "GeoQA+/images/3688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C在⊙O上,半径OD∥AC,如果∠BOD=130°,那么∠B的度数为()\nChoices:\n(A) 30°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 2.718827486038208 }, { "id": "identity_61314", "image": "DocVQA/images/fgbg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many events are scheduled in the month of October?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.015625, "loss": 2.5632026195526123 }, { "id": "identity_286910", "image": "PMC-VQA/images/PMC8148521_biology-10-00412-f005_450369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was the significance of the blue arrow in the CT image, indicating which of the following structures: Liver Parenchyma, Nodule, Artery, or Vein?\nChoices:\n(A) Liver Parenchyma \n(B) Nodule \n(C) Artery \n(D) Vein" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.125, "loss": 4.646682262420654 }, { "id": "identity_151916", "image": "AI2D/images/4467.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which foot type would likely require the most cushioning in footwear?" }, { "from": "gpt", "value": "The answer is Severe overpronation" } ], "clip_score": 37.15625, "loss": 2.0343613624572754 }, { "id": "identity_3052", "image": "GeoQA+/images/2997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,在Rt△ABC中∠A=25°,∠ACB=90°,以点C为圆心,BC为半径的圆交AB于一点D,交AC于点E,则∠DCE的度数为()\nChoices:\n(A) 30°\n(B) 25°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 2.63071608543396 }, { "id": "identity_130306", "image": "VizWiz/images/VizWiz_train_00016175.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the 'Submit' button on the captcha image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 17.46875, "loss": 3.864792585372925 }, { "id": "identity_252254", "image": "TabMWP/images/22918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average temperature of the readings that are at least 20°C but less than 40°C?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 31.53125, "loss": 2.486643075942993 }, { "id": "identity_6649", "image": "UniGeo/images/527.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,圆锥的母线长为2,底面圆的周长为3,则该圆锥的侧面积为()\nChoices:\n(A) 3π\n(B) 3\n(C) 6π\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.46875, "loss": 3.153934955596924 }, { "id": "identity_234255", "image": "TextVQA/images/032a1bbda0389235.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different players scored for Germany, and what is the average number of goals scored by each of these players?" }, { "from": "gpt", "value": "The answer is 4 players, 1.5 goals per player" } ], "clip_score": 27.171875, "loss": 1.9415737390518188 }, { "id": "identity_225446", "image": "UniGeo/images/127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知AB∥CD,DE⊥AC,垂足为E,∠A=130°,若∠C=40°,则∠D的度数是多少?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 33.4375, "loss": 1.9245930910110474 }, { "id": "identity_3640", "image": "VQA2.0/images/532583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport is this?" }, { "from": "gpt", "value": "The answer is baseball" } ], "clip_score": 24.296875, "loss": 3.2858376502990723 }, { "id": "identity_131590", "image": "VQA-AS/images/17980.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What food items are visible on the table?" }, { "from": "gpt", "value": "The answer is Bowl of snacks, two drinks, and a slice of cake" } ], "clip_score": 13.8046875, "loss": 2.393254280090332 }, { "id": "identity_277247", "image": "A-OKVQA/images/422328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many legs does the animal have?\nOptions:\n(A) four\n(B) eight\n(C) six\n(D) two" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.828125, "loss": 3.861638307571411 }, { "id": "identity_246506", "image": "PlotQA/images/83035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average annual growth rate of merchandise exports from 1969 to 1975?" }, { "from": "gpt", "value": "The answer is 33.4%" } ], "clip_score": 25.921875, "loss": 1.9036076068878174 }, { "id": "identity_28438", "image": "VQA2.0/images/63681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What gas station is in the picture?" }, { "from": "gpt", "value": "The answer is texaco" } ], "clip_score": 14.546875, "loss": 2.9870564937591553 }, { "id": "identity_163447", "image": "GeoQA+/images/4239.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC in the equilateral triangle ABC?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 30.921875, "loss": 1.4937182664871216 }, { "id": "identity_96466", "image": "TabMWP/images/25769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many robot stamps can Bonnie buy with $2?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 29.515625, "loss": 2.6829981803894043 }, { "id": "identity_48439", "image": "ChartQA/images/9845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in percentage points between the favorable views of the U.S. in 2002 and 2015?" }, { "from": "gpt", "value": "The answer is 5%" } ], "clip_score": 26.3125, "loss": 2.5514299869537354 }, { "id": "identity_166229", "image": "IconQA/images/71566.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line were to be extended to the 7 cm mark, how much longer in centimeters would it be?" }, { "from": "gpt", "value": "The answer is 1 cm" } ], "clip_score": 29.734375, "loss": 2.198291540145874 }, { "id": "identity_35582", "image": "GeoQA+/images/2764.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如下图,AB∥EF∥CD,∠ABC=46°,∠BCE=20°,则∠CEF=()\nChoices:\n(A) 144°\n(B) 154°\n(C) 164°\n(D) 160°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0, "loss": 2.496657609939575 }, { "id": "identity_80442", "image": "GeoQA+/images/1557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O外一点,过点C作⊙O的切线,切点为B,连接AC交⊙O于点D,若∠C=40°,则∠ACB的度数是多少?" }, { "from": "gpt", "value": "The answer is 140°" } ], "clip_score": 26.234375, "loss": 1.7139697074890137 }, { "id": "identity_27987", "image": "TQA/images/question_images/greenhouse_effect_6947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Steps of greenhouse effect\nChoices:\n(A) 4\n(B) 1\n(C) 5\n(D) 3" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.34375, "loss": 2.875948667526245 }, { "id": "identity_130568", "image": "A-OKVQA/images/260906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tablecloth on the table where the cake is placed?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 21.640625, "loss": 4.383149147033691 }, { "id": "identity_334566", "image": "DVQA/images/bar_train_00153639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the bars a single solid color without any patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.3125, "loss": 3.5895602703094482 }, { "id": "identity_252532", "image": "A-OKVQA/images/544325.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are three visual clues in the image that indicate the setting is a baseball field?" }, { "from": "gpt", "value": "The answer is Baseball glove, dirt infield, baseball attire" } ], "clip_score": 26.40625, "loss": 2.5893895626068115 }, { "id": "identity_234042", "image": "ChartQA/images/two_col_62149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total area in thousand hectares for all land types except the one with the most data on the graph?" }, { "from": "gpt", "value": "The answer is 633.77 thousand hectares" } ], "clip_score": 26.328125, "loss": 1.9070199728012085 }, { "id": "identity_182613", "image": "PMC-VQA/images/PMC4074584_F4_302022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the surface within the circular structure?" }, { "from": "gpt", "value": "The answer is Fibrous or hair-like" } ], "clip_score": 23.859375, "loss": 3.1276016235351562 }, { "id": "identity_239794", "image": "VQA-RAD/images/synpic15006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cardiomegaly or other abnormalities in the heart size or shape?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.625, "loss": 2.638746738433838 }, { "id": "identity_149505", "image": "DocVQA/images/kydh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the project protocol being prepared?" }, { "from": "gpt", "value": "The answer is The project protocol is being prepared by Dr. Reynolds" } ], "clip_score": 24.203125, "loss": 2.791464328765869 }, { "id": "identity_154205", "image": "ScienceQA/images/9614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 31.671875, "loss": 2.1619904041290283 }, { "id": "identity_77033", "image": "TQA/images/question_images/skin_cross_section_7587.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"sweat gland\"?" }, { "from": "gpt", "value": "The answer is Regulates body temperature through sweat production" } ], "clip_score": 24.78125, "loss": 2.529773473739624 }, { "id": "identity_190119", "image": "Super-CLEVR/images/superCLEVR_new_001030.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.296875, "loss": 2.5534839630126953 }, { "id": "identity_306719", "image": "MapQA/images/map_3328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of this state?" }, { "from": "gpt", "value": "The answer is 27.1%-29.7%" } ], "clip_score": 22.671875, "loss": 1.9658360481262207 }, { "id": "identity_327313", "image": "PMC-VQA/images/PMC8867629_Fig1_201298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging technology is used to measure certain parameters in the C4-C6 region?\nChoices:\n(A) MRI\n(B) CT scan\n(C) PET scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.21875, "loss": 3.986833095550537 }, { "id": "identity_171098", "image": "TabMWP/images/33086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of one thick black marker, one silver pen, and one bright yellow highlighter?" }, { "from": "gpt", "value": "The answer is $0.89" } ], "clip_score": 36.96875, "loss": 2.1773691177368164 }, { "id": "identity_244548", "image": "DocVQA/images/rnjf0226_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main factors that have strengthened the already-positive perceptions of TZDs, and how do they compare in terms of their impact on prescribing habits?" }, { "from": "gpt", "value": "The answer is Speaker presentations and clinical data; clinical data alters habits" } ], "clip_score": 23.09375, "loss": 3.1658730506896973 }, { "id": "identity_272777", "image": "DocVQA/images/lffn0226_48.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the description provided in Figure 14 of this document?" }, { "from": "gpt", "value": "The answer is Looking down on to the greenhouse roof showing three cloth infuser bags inflated with clean air." } ], "clip_score": 22.453125, "loss": 3.381667375564575 }, { "id": "identity_270193", "image": "Super-CLEVR/images/superCLEVR_new_001341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the cyan object located in front of the large metal double bus?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 21.125, "loss": 3.646563768386841 }, { "id": "identity_203534", "image": "TextVQA/images/51f483fa116b962a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles have a dark liquid inside them?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.640625, "loss": 2.453334093093872 }, { "id": "identity_108925", "image": "A-OKVQA/images/175570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a traffic light visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.78125, "loss": 3.1888673305511475 }, { "id": "identity_33151", "image": "TextVQA/images/57e1ac153cde6348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand of beer is this?" }, { "from": "gpt", "value": "The answer is floris" } ], "clip_score": 36.0625, "loss": 4.0891265869140625 }, { "id": "identity_140630", "image": "FigureQA/images/34662.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color label of the bar with the highest value?" }, { "from": "gpt", "value": "The answer is Tan" } ], "clip_score": 25.4375, "loss": 3.4764180183410645 }, { "id": "identity_323532", "image": "ScienceQA/images/20142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.765625, "loss": 3.655075788497925 }, { "id": "identity_99986", "image": "TabMWP/images/8266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average quantity demanded for the prices listed in the table?" }, { "from": "gpt", "value": "The answer is 11,700" } ], "clip_score": 35.625, "loss": 1.8388344049453735 }, { "id": "identity_11419", "image": "DVQA/images/bar_train_00025523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object is preferred by the most number of people summed across all the categories?" }, { "from": "gpt", "value": "The answer is bottle" } ], "clip_score": 29.9375, "loss": 2.335270881652832 }, { "id": "identity_78967", "image": "Geometry3K/images/543.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side x in the triangle?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.8125, "loss": 2.5739052295684814 }, { "id": "identity_275422", "image": "DVQA/images/bar_train_00054364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the entire chart, which group of bars has the highest value for an individual bar?" }, { "from": "gpt", "value": "The answer is check" } ], "clip_score": 26.4375, "loss": 4.148070335388184 }, { "id": "identity_147874", "image": "DocVQA/images/jlbx0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the current portion of long-term debt in the current liabilities?" }, { "from": "gpt", "value": "The answer is 115" } ], "clip_score": 23.265625, "loss": 3.13956880569458 }, { "id": "identity_319301", "image": "DocVQA/images/zznc0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the 1958 budget for this activity for Alaska?" }, { "from": "gpt", "value": "The answer is $ 122,000" } ], "clip_score": 22.359375, "loss": 1.9409409761428833 }, { "id": "identity_45625", "image": "DVQA/images/bar_train_00078938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the 'sand' and 'bundle' datasets for the 'liquid' category?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.03125, "loss": 3.2925384044647217 }, { "id": "identity_60468", "image": "DocVQA/images/gsbg0072_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much did the termination fee decrease from before 09/01/88 to after 09/01/88 and before 09/01/89?" }, { "from": "gpt", "value": "The answer is 200,000" } ], "clip_score": 29.296875, "loss": 1.3742971420288086 }, { "id": "identity_287588", "image": "FigureQA/images/22854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Teal or Mint?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.109375, "loss": 4.004152774810791 }, { "id": "identity_81960", "image": "UniGeo/images/2059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that O is a point inside quadrilateral ABCD, with OA=OB=OC, and ∠ABC=∠ADC=70°, what is the measure of ∠AOC?" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 36.34375, "loss": 1.4414541721343994 }, { "id": "identity_233555", "image": "Geometry3K/images/33.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, AB = 2x + 3 and BC = 5x. If the diagonals AC and BD intersect at point E, and AE = 6, find the length of diagonal BD" }, { "from": "gpt", "value": "The answer is \\( 2\\sqrt{11} \\)" } ], "clip_score": 36.84375, "loss": 2.6812939643859863 }, { "id": "identity_223978", "image": "PlotQA/images/3940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the highest and lowest values for both Sanitation services and Drinking water services over the years shown?" }, { "from": "gpt", "value": "The answer is 0%" } ], "clip_score": 24.828125, "loss": 2.2769503593444824 }, { "id": "identity_323294", "image": "DocVQA/images/hnkl0052_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the telephone number here?" }, { "from": "gpt", "value": "The answer is 75955" } ], "clip_score": 20.359375, "loss": 2.7346882820129395 }, { "id": "identity_12698", "image": "TextVQA/images/88c555915e88b495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the 4 numbers being displayed?" }, { "from": "gpt", "value": "The answer is 00:00" } ], "clip_score": 18.046875, "loss": 1.4864532947540283 }, { "id": "identity_281434", "image": "A-OKVQA/images/557568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the action being performed by the player in the foreground?\nChoices:\n(A) designated runner\n(B) practicing swing\n(C) at bat\n(D) stealing base" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.265625, "loss": 4.814476013183594 }, { "id": "identity_94948", "image": "IconQA/images/77190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is placed in the first box on the frame?" }, { "from": "gpt", "value": "The answer is Pentagon" } ], "clip_score": 23.734375, "loss": 3.327113389968872 }, { "id": "identity_309349", "image": "FigureQA/images/69427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Olive?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.3515625, "loss": 3.8470301628112793 }, { "id": "identity_52839", "image": "FigureQA/images/32870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.09375, "loss": 3.7946128845214844 }, { "id": "identity_297490", "image": "Super-CLEVR/images/superCLEVR_new_000326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the car made of if it is the same color as the shiny cruiser?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 26.09375, "loss": 3.6077582836151123 }, { "id": "identity_300919", "image": "AI2D/images/421.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the food web, which organism would be most directly impacted if there were no Phytoplanktons?\nChoices:\n(A) Seagull\n(B) Squid\n(C) Zooplankton\n(D) Penguin" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 39.875, "loss": 3.6152210235595703 }, { "id": "identity_120780", "image": "TextVQA/images/05e2313e9ccc17cc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the price tag displayed in the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 22.0625, "loss": 3.3676364421844482 }, { "id": "identity_166558", "image": "IconQA/images/70192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices does this shape have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.546875, "loss": 2.3001556396484375 }, { "id": "identity_235068", "image": "TextVQA/images/2e1b9c0ee2c6254e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific model of Hublot watch is this, considering its design features and color scheme?" }, { "from": "gpt", "value": "The answer is Hublot Big Bang Ferrari" } ], "clip_score": 33.15625, "loss": 2.408057928085327 }, { "id": "identity_331439", "image": "TextVQA/images/9e6c692bb4d72403.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What memories is the book keeping a record of?" }, { "from": "gpt", "value": "The answer is medicina" } ], "clip_score": 32.5, "loss": 3.2665507793426514 }, { "id": "identity_263094", "image": "TextVQA/images/cd057e875d1807ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand is depicted on the red rectangle?" }, { "from": "gpt", "value": "The answer is emirates" } ], "clip_score": 26.890625, "loss": 2.5971055030822754 }, { "id": "identity_94336", "image": "TabMWP/images/24188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of y for all the given x values?" }, { "from": "gpt", "value": "The answer is 11.67" } ], "clip_score": 27.59375, "loss": 1.7761075496673584 }, { "id": "identity_171261", "image": "IconQA/images/66113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Assuming each activity took the same amount of time, at what time did Brad finish packing a lunch?" }, { "from": "gpt", "value": "The answer is 7:45 AM" } ], "clip_score": 24.484375, "loss": 1.943155288696289 }, { "id": "identity_326550", "image": "A-OKVQA/images/330726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the function of the metal object in front of the building?\nChoices:\n(A) toll booth\n(B) recycling\n(C) art display\n(D) food stand" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.2578125, "loss": 4.703729629516602 }, { "id": "identity_333733", "image": "PMC-VQA/images/PMC9107200_Fig1_282135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What staining method is used in this image?\nChoices:\n(A) Hemoglobin and Ethanol\n(B) Hematoxylin and Eosin\n(C) Hemoglobin and Eosin\n(D) Hematoxylin and Ethanol" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.15625, "loss": 3.3876261711120605 }, { "id": "identity_228600", "image": "ScienceQA/images/5173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which bird's beak is adapted to catch insects and is also known for its nocturnal feeding habits?" }, { "from": "gpt", "value": "The answer is common nighthawk" } ], "clip_score": 27.359375, "loss": 2.974210262298584 }, { "id": "identity_222978", "image": "A-OKVQA/images/12020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the skateboard deck primarily?" }, { "from": "gpt", "value": "The answer is Blue and white" } ], "clip_score": 19.328125, "loss": 3.2318663597106934 }, { "id": "identity_219920", "image": "A-OKVQA/images/226594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the bibs the children are wearing?" }, { "from": "gpt", "value": "The answer is SNOW DRAGON" } ], "clip_score": 27.96875, "loss": 2.569936513900757 }, { "id": "identity_298272", "image": "A-OKVQA/images/280666.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which country do bananas originally come from?\nChoices:\n(A) Philippines\n(B) China\n(C) Peru\n(D) New Guinea" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.171875, "loss": 3.5550456047058105 }, { "id": "identity_78653", "image": "AI2D/images/1459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stages are depicted in the formation of the geological feature?" }, { "from": "gpt", "value": "The answer is Four stages" } ], "clip_score": 28.703125, "loss": 4.015280246734619 }, { "id": "identity_73712", "image": "TQA/images/abc_question_images/skin_cross_section_17581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the location of the sweat gland?" }, { "from": "gpt", "value": "The answer is S" } ], "clip_score": 26.796875, "loss": 3.015221118927002 }, { "id": "identity_77579", "image": "AI2D/images/2004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify a primary consumer in the given food web" }, { "from": "gpt", "value": "The answer is Rabbit, Mouse, or Goat" } ], "clip_score": 27.90625, "loss": 3.293259620666504 }, { "id": "identity_262492", "image": "FigureQA/images/81439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the smoothest?\nOptions:\n(A) No\n(B) Yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.203125, "loss": 3.6673378944396973 }, { "id": "identity_146860", "image": "DocVQA/images/qgwv0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tie in points between any of the nominees?" }, { "from": "gpt", "value": "The answer is Yes, there is a tie" } ], "clip_score": 20.3125, "loss": 2.1267473697662354 }, { "id": "identity_170226", "image": "IconQA/images/100508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of points on the stars?" }, { "from": "gpt", "value": "The answer is 5.33 points" } ], "clip_score": 26.25, "loss": 2.133577346801758 }, { "id": "identity_178734", "image": "TabMWP/images/24697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day were the least number of goals made?" }, { "from": "gpt", "value": "The answer is Sunday (or Tuesday or Thursday)" } ], "clip_score": 35.59375, "loss": 2.27772855758667 }, { "id": "identity_289889", "image": "VQA2.0/images/467245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which season is currently occurring?" }, { "from": "gpt", "value": "The answer is summer" } ], "clip_score": 16.21875, "loss": 3.317248821258545 }, { "id": "identity_52116", "image": "DVQA/images/bar_train_00047060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets have an accuracy below 50 for the algorithm luxury?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.59375, "loss": 2.2518112659454346 }, { "id": "identity_125396", "image": "TextVQA/images/022be8ef346e3009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are carrying blue objects?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 19.125, "loss": 4.6388349533081055 }, { "id": "identity_89311", "image": "UniGeo/images/2601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if AD = 7 cm and AB = 3 cm, and AE bisects angle BAD, what is the length of DE?" }, { "from": "gpt", "value": "The answer is 4 cm" } ], "clip_score": 30.3125, "loss": 1.7600144147872925 }, { "id": "identity_213785", "image": "VQA2.0/images/192369.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bag carried by the person on the left?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 12.171875, "loss": 3.117093801498413 }, { "id": "identity_265704", "image": "UniGeo/images/2167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, D and E are the midpoints of BC and AC, respectively. BF bisects angle ABC and intersects DE at point F. If BC is 6, what is the length of DF?\nChoices:\n(A) 3\n(B) 2\n(C) 5/2\n(D) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 36.03125, "loss": 2.400557041168213 }, { "id": "identity_230632", "image": "TextVQA/images/9f9e1b7cdf8cc9d5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different brands or companies are advertised in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Eight brands or companies: GMC, Chevrolet, NASDAQ, Spike TV, Budweiser, Chang, McCain, Obama, Sununu, Lautenberg" } ], "clip_score": 22.296875, "loss": 2.8733808994293213 }, { "id": "identity_304632", "image": "UniGeo/images/1885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,弦CD垂直于AB于点M,AM=2,OM=3。则CD的长度为()\n选项:\n(A) 4\n(B) 5\n(C) 8\n(D) 16" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.375, "loss": 2.3838164806365967 }, { "id": "identity_168719", "image": "TabMWP/images/28445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did Judy make more than 2 baskets?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.21875, "loss": 2.333611249923706 }, { "id": "identity_166459", "image": "TabMWP/images/35418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the McCullough family wants to eat as many vegetables as the Pariseau family, how many more do they need to eat?" }, { "from": "gpt", "value": "The answer is 188" } ], "clip_score": 29.765625, "loss": 2.648129940032959 }, { "id": "identity_51588", "image": "FigureQA/images/87927.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines are there in the graph?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.359375, "loss": 2.2739903926849365 }, { "id": "identity_13396", "image": "TabMWP/images/6697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Jonah need to buy a stick of butter and a package of oatmeal cookies?" }, { "from": "gpt", "value": "The answer is 6.38" } ], "clip_score": 32.21875, "loss": 2.4244449138641357 }, { "id": "identity_81742", "image": "UniGeo/images/491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the cone's lateral surface is a sector with a radius of 4 and a central angle of 90°, what is the lateral surface area of the cone?" }, { "from": "gpt", "value": "The answer is 4π or approximately 12.57" } ], "clip_score": 30.375, "loss": 1.3869653940200806 }, { "id": "identity_250874", "image": "FigureQA/images/75601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the average rate of change for both Dark Blue and Crimson lines over the interval from x = 0 to x = 100. Which line has the smaller absolute average rate of change?" }, { "from": "gpt", "value": "The answer is Dark Blue" } ], "clip_score": 29.484375, "loss": 3.055471658706665 }, { "id": "identity_253305", "image": "VQA-AS/images/19297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many animals are present in the image, and what types are they?" }, { "from": "gpt", "value": "The answer is 1 dog, 1 rabbit, 2 birds (geese)" } ], "clip_score": 32.09375, "loss": 1.9358419179916382 }, { "id": "identity_5932", "image": "Super-CLEVR/images/superCLEVR_new_001455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the big brown chopper and the tiny chopper made of the same material?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.984375, "loss": 5.479205131530762 }, { "id": "identity_91697", "image": "Geometry3K/images/725.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle PCB?" }, { "from": "gpt", "value": "The answer is 30°" } ], "clip_score": 30.0, "loss": 2.0358145236968994 }, { "id": "identity_198449", "image": "TextVQA/images/fc103492574405e9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the player at bat?" }, { "from": "gpt", "value": "The answer is Griffey" } ], "clip_score": 26.390625, "loss": 2.7381222248077393 }, { "id": "identity_206873", "image": "PMC-VQA/images/PMC6208962_Fig9_388583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI sequence is this image likely from?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI sequence" } ], "clip_score": 28.640625, "loss": 1.3951048851013184 }, { "id": "identity_188010", "image": "PMC-VQA/images/PMC6519282_mrm27498-fig-0002_480334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to highlight the area of interest in the brain scans?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 32.3125, "loss": 3.25666880607605 }, { "id": "identity_146983", "image": "DocVQA/images/sqcy0227_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the moderator for the Research Forum?" }, { "from": "gpt", "value": "The answer is Elwood W. Speckmann, Ph.D" } ], "clip_score": 26.09375, "loss": 3.0051207542419434 }, { "id": "identity_227220", "image": "FigureQA/images/26252.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the Brown and Light Gold sections?" }, { "from": "gpt", "value": "The answer is 0%" } ], "clip_score": 31.984375, "loss": 2.0891542434692383 }, { "id": "identity_107824", "image": "PMC-VQA/images/PMC4631946_F13_440493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the circular objects in all three images identical in number?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.59375, "loss": 2.9251694679260254 }, { "id": "identity_272449", "image": "UniGeo/images/1971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, quadrilateral ABCD is an inscribed quadrilateral in circle O. If ∠ADC = 110°, what is the measure of ∠AOC?\nChoices:\n(A) 110°\n(B) 120°\n(C) 130°\n(D) 140°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 34.5625, "loss": 2.450878381729126 }, { "id": "identity_154189", "image": "TQA/images/abc_question_images/cycle_water_14915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label in the image represents the body of water?" }, { "from": "gpt", "value": "The answer is Label Y" } ], "clip_score": 29.796875, "loss": 3.4464056491851807 }, { "id": "identity_272419", "image": "GeoQA+/images/655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, if the two vertices of a right-angled triangle board with a 30° angle are placed on the two parallel sides of a ruler, and ∠β = 85°, then what is the value of α?\nChoices:\n(A) 155°\n(B) 145°\n(C) 135°\n(D) 125°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.625, "loss": 2.512878894805908 }, { "id": "identity_191559", "image": "A-OKVQA/images/571441.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is predominantly seen in the elephant's habitat?" }, { "from": "gpt", "value": "The answer is The predominant vegetation is grass" } ], "clip_score": 23.75, "loss": 3.06351375579834 }, { "id": "identity_177979", "image": "IconQA/images/78845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you group the pink marbles into groups of 4, how many groups will you have and how many marbles will be left without a group?" }, { "from": "gpt", "value": "The answer is 2 groups and 2 marbles left" } ], "clip_score": 32.125, "loss": 2.0507569313049316 }, { "id": "identity_294368", "image": "VQA2.0/images/275211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this stand sell hot dogs?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.75, "loss": 3.364234209060669 }, { "id": "identity_262099", "image": "UniGeo/images/485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, it is known that the base and height of a certain cone's axial section are both 10cm. What is the lateral surface area of this cone?\nChoices:\n(A) 50√5πcm^{2}\n(B) 50πcm^{2}\n(C) 25πcm^{2}\n(D) 25√5πcm^{2}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.0625, "loss": 2.146362781524658 }, { "id": "identity_83020", "image": "GeoQA+/images/5404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,点C、D是⊙O上的点,OD⊥AC,连接DC,若∠COB=20°,则线段CD的长度是多少?" }, { "from": "gpt", "value": "The answer is CD = √(OD^2 + OC^2)" } ], "clip_score": 27.828125, "loss": 1.3666260242462158 }, { "id": "identity_326145", "image": "PMC-VQA/images/PMC9375826_FIG1_372143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the brain shows the indicated feature in the DWI image?\nChoices:\n(A) left side\n(B) right side\n(C) both sides\n(D) cannot determine" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.078125, "loss": 3.9394142627716064 }, { "id": "identity_299637", "image": "A-OKVQA/images/406734.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which event will the people take part?\nChoices:\n(A) motorcycle parade\n(B) marathon\n(C) protest\n(D) touring" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.390625, "loss": 4.24932861328125 }, { "id": "identity_8783", "image": "AI2D/images/4635.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What represents the larva stage?\nChoices:\n(A) none of the above\n(B) C\n(C) B\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.671875, "loss": 3.788414716720581 }, { "id": "identity_201763", "image": "PMC-VQA/images/PMC7538481_Fig1_266861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely reason for performing this imaging procedure?" }, { "from": "gpt", "value": "The answer is To diagnose or assess liver/portal venous system condition" } ], "clip_score": 27.984375, "loss": 3.593945264816284 }, { "id": "identity_259394", "image": "A-OKVQA/images/9760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are in the room, and what type of animal can be seen on top of the shelf near the back doors?" }, { "from": "gpt", "value": "The answer is five people, owl" } ], "clip_score": 18.78125, "loss": 3.621812105178833 }, { "id": "identity_132462", "image": "A-OKVQA/images/185943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is located on the ground floor of the building with the \"CAPITOL\" sign?" }, { "from": "gpt", "value": "The answer is A gas station" } ], "clip_score": 19.3125, "loss": 3.7812235355377197 }, { "id": "identity_215513", "image": "VQA2.0/images/533955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape is cut out from the back of the girl's dress?" }, { "from": "gpt", "value": "The answer is Heart" } ], "clip_score": 17.984375, "loss": 4.012940406799316 }, { "id": "identity_324944", "image": "PMC-VQA/images/PMC4481350_pone.0131118.g001_400126.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging was used to identify the infarct?\nChoices:\n(A) X-ray\n(B) CT scan\n(C) Diffusion-weighted or susceptibility-weighted magnetic resonance imaging\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.40625, "loss": 4.120115280151367 }, { "id": "identity_143843", "image": "MapQA/images/map_525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for the states in the Midwest?" }, { "from": "gpt", "value": "The answer is 77 to 85" } ], "clip_score": 25.09375, "loss": 2.3026440143585205 }, { "id": "identity_162667", "image": "UniGeo/images/300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,BF平分∠ABC,CF平分∠ACB,若∠BFC=115°,则∠ACB的度数是多少?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 30.4375, "loss": 1.850348949432373 }, { "id": "identity_51650", "image": "DVQA/images/bar_train_00100598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the 'stairs' category are preferred by exactly 4 people?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 30.5625, "loss": 2.2924907207489014 }, { "id": "identity_39629", "image": "DocVQA/images/qkcw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title or degree of the person who assigned the document?" }, { "from": "gpt", "value": "The answer is PhD" } ], "clip_score": 25.515625, "loss": 2.694917678833008 }, { "id": "identity_311048", "image": "ScienceQA/images/2014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What can they trade to each get what they want?\nChoices:\n(A) Matthew can trade his tomatoes for Robert's broccoli.\n(B) Robert can trade his broccoli for Matthew's oranges.\n(C) Robert can trade his almonds for Matthew's tomatoes.\n(D) Matthew can trade his tomatoes for Robert's carrots." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.484375, "loss": 3.367504119873047 }, { "id": "identity_102921", "image": "TabMWP/images/12861.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which zoo has the highest total cost for one adult and one child ticket combined?" }, { "from": "gpt", "value": "The answer is Minnesota and Bronx" } ], "clip_score": 34.96875, "loss": 3.334944009780884 }, { "id": "identity_108342", "image": "TextVQA/images/09460ef47a513073.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the sky in the background of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 16.625, "loss": 3.2547404766082764 }, { "id": "identity_236192", "image": "ScienceQA/images/12144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the passage, identify two specific weather conditions mentioned and explain how they differ from climate" }, { "from": "gpt", "value": "The answer is Weather conditions are short-term; climate is long-term patterns" } ], "clip_score": 17.5625, "loss": 3.0969226360321045 }, { "id": "identity_57850", "image": "DVQA/images/bar_train_00082242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values for 'boat'?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.59375, "loss": 2.16131329536438 }, { "id": "identity_229903", "image": "CLEVR-Math/images/CLEVR_train_004132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 1 blue metallic cube and remove 1 yellow cylinder, how many blue metallic cubes and yellow cylinders are left in total?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.96875, "loss": 2.965833902359009 }, { "id": "identity_9744", "image": "VQA-AS/images/13712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shrubs?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 16.609375, "loss": 2.7623844146728516 }, { "id": "identity_323073", "image": "DVQA/images/bar_train_00113010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of death in this category?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.859375, "loss": 3.090924024581909 }, { "id": "identity_216343", "image": "A-OKVQA/images/236882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the helmets on the motorcycle in the center?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 24.109375, "loss": 3.174063205718994 }, { "id": "identity_18036", "image": "TextVQA/images/01375ce0b7e9e9f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what auto company is a sponsor?" }, { "from": "gpt", "value": "The answer is volvo" } ], "clip_score": 15.7109375, "loss": 2.2518310546875 }, { "id": "identity_59555", "image": "DocVQA/images/gldj0038_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date when the check should be dated?" }, { "from": "gpt", "value": "The answer is PERSONAL/CONFIDENTIAL MATERIAL REDACTED" } ], "clip_score": 30.234375, "loss": 1.553601861000061 }, { "id": "identity_214884", "image": "A-OKVQA/images/222322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the structure at the far end of the beach?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 24.484375, "loss": 3.710811138153076 }, { "id": "identity_218687", "image": "A-OKVQA/images/381192.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screens are turned on?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 21.65625, "loss": 4.341095447540283 }, { "id": "identity_175058", "image": "TabMWP/images/32390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bracelets had more than 20 but less than 40 silver beads?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.46875, "loss": 2.4886832237243652 }, { "id": "identity_270234", "image": "MapQA/images/map_6716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Mississippi the state bordering Louisiana with the lowest value among them?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.671875, "loss": 2.7223057746887207 }, { "id": "identity_275806", "image": "DVQA/images/bar_train_00140046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the lowest total value when all values are added together?" }, { "from": "gpt", "value": "The answer is aim" } ], "clip_score": 25.90625, "loss": 3.8856186866760254 }, { "id": "identity_298531", "image": "ChartQA/images/two_col_101078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who do not utilize the group chat feature in text messaging (SMS) or online messaging applications?" }, { "from": "gpt", "value": "The answer is 37" } ], "clip_score": 26.484375, "loss": 2.8875749111175537 }, { "id": "identity_65915", "image": "AI2D/images/3807.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many segments can be observed in the antenna?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 13.375, "loss": 1.9539991617202759 }, { "id": "identity_315912", "image": "VizWiz/images/VizWiz_train_00019475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this place?" }, { "from": "gpt", "value": "The answer is head stones" } ], "clip_score": 27.8125, "loss": 4.8717169761657715 }, { "id": "identity_218436", "image": "VizWiz/images/VizWiz_train_00004376.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the serving size of the product in the image?" }, { "from": "gpt", "value": "The answer is 2/3 cup (130g)" } ], "clip_score": 29.28125, "loss": 1.9996943473815918 }, { "id": "identity_200985", "image": "A-OKVQA/images/35726.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the boat's hull?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.59375, "loss": 3.8595752716064453 }, { "id": "identity_218536", "image": "PMC-VQA/images/PMC8801695_F6_180439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct blue areas can be counted in the image?" }, { "from": "gpt", "value": "The answer is Count the number of distinct blue areas and provide the number" } ], "clip_score": 17.375, "loss": 2.2340505123138428 }, { "id": "identity_169011", "image": "IconQA/images/5674.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which image appears most frequently in the sequence?" }, { "from": "gpt", "value": "The answer is The crescent moon appears most frequently" } ], "clip_score": 33.8125, "loss": 2.8303375244140625 }, { "id": "identity_299969", "image": "ScienceQA/images/7368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is being indicated by the highlighted area?\nChoices:\n(A) Tonga\n(B) Kiribati\n(C) Palau\n(D) Tuvalu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.796875, "loss": 4.045191764831543 }, { "id": "identity_205828", "image": "TextVQA/images/02adaed82504f6a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is predominantly used on the packaging?" }, { "from": "gpt", "value": "The answer is Spanish" } ], "clip_score": 19.5625, "loss": 3.160879373550415 }, { "id": "identity_170062", "image": "TabMWP/images/24822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest score achieved in the quiz show?" }, { "from": "gpt", "value": "The answer is 69" } ], "clip_score": 20.46875, "loss": 2.894031524658203 }, { "id": "identity_113894", "image": "A-OKVQA/images/14677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color backpack is the first man wearing?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 16.6875, "loss": 3.2409210205078125 }, { "id": "identity_317944", "image": "DVQA/images/bar_train_00120037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of this larger than trail?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.609375, "loss": 3.7393367290496826 }, { "id": "identity_296382", "image": "VQA2.0/images/486513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a vehicle present on the road?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.28125, "loss": 3.630178451538086 }, { "id": "identity_278655", "image": "FigureQA/images/99532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Rosy Brown the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.546875, "loss": 3.9268691539764404 }, { "id": "identity_232176", "image": "GeoQA+/images/1333.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,PA、PB分别是⊙O的两条切线,切点是A、B,点C在⊙O上,若∠P=50°,求∠ACB的余弦值。" }, { "from": "gpt", "value": "The answer is cos(65°)" } ], "clip_score": 26.15625, "loss": 2.0934383869171143 }, { "id": "identity_320331", "image": "CLEVR-Math/images/CLEVR_train_066325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all small yellow matte balls. Remove all cyan objects. How many items remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.28125, "loss": 2.292833089828491 }, { "id": "identity_119275", "image": "TextVQA/images/54a6ec25c242fddf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the website address at the bottom of the poster?" }, { "from": "gpt", "value": "The answer is www.inrap.fr" } ], "clip_score": 25.3125, "loss": 2.054821491241455 }, { "id": "identity_229566", "image": "FigureQA/images/31820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the Aqua and Medium Purple sections?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 37.125, "loss": 1.7875080108642578 }, { "id": "identity_309146", "image": "AI2D/images/2322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagram below illustrates the life cycle of the mountain pine beetle. Which label indicates the second summer?\nChoices:\n(A) A\n(B) J\n(C) I\n(D) E" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 44.53125, "loss": 3.5630688667297363 }, { "id": "identity_103009", "image": "CLEVR-Math/images/CLEVR_train_038639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.71875, "loss": 2.5605547428131104 }, { "id": "identity_214483", "image": "Super-CLEVR/images/superCLEVR_new_000784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car to the left of the large green matte object?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.953125, "loss": 4.8264079093933105 }, { "id": "identity_232386", "image": "VQA-AS/images/11204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the three main objects in the image, and where is each located relative to the woman sitting on the blanket?" }, { "from": "gpt", "value": "The answer is Slide to left, bicycle to right, blanket under" } ], "clip_score": 29.09375, "loss": 2.670558214187622 }, { "id": "identity_223709", "image": "A-OKVQA/images/263359.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many riders are wearing helmets?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.703125, "loss": 2.597712516784668 }, { "id": "identity_194522", "image": "VQA2.0/images/548562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of setting is the bird in?" }, { "from": "gpt", "value": "The answer is Residential area" } ], "clip_score": 24.90625, "loss": 3.344247579574585 }, { "id": "identity_145416", "image": "DVQA/images/bar_train_00051192.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of people who preferred the object sense across the three categories?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.78125, "loss": 2.417114496231079 }, { "id": "identity_67372", "image": "TQA/images/question_images/skin_cross_section_7582.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Sebaceous gland\"?" }, { "from": "gpt", "value": "The answer is Sebaceous gland secretes sebum to lubricate skin and hair" } ], "clip_score": 36.09375, "loss": 1.5938079357147217 }, { "id": "identity_237192", "image": "VizWiz/images/VizWiz_train_00012122.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the active ingredient in this asthma inhaler and what is its primary function?" }, { "from": "gpt", "value": "The answer is Albuterol sulfate, bronchodilator" } ], "clip_score": 34.84375, "loss": 2.004070520401001 }, { "id": "identity_142189", "image": "DVQA/images/bar_train_00115125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many algorithms have accuracies less than 5?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.15625, "loss": 2.522172451019287 }, { "id": "identity_199149", "image": "VQA-AS/images/1203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct trees can be identified in the background?" }, { "from": "gpt", "value": "The answer is There are three distinct trees" } ], "clip_score": 16.0, "loss": 3.37634539604187 }, { "id": "identity_166015", "image": "TabMWP/images/26404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of offices on all floors combined?" }, { "from": "gpt", "value": "The answer is 1031" } ], "clip_score": 24.828125, "loss": 2.2963831424713135 }, { "id": "identity_332191", "image": "A-OKVQA/images/423313.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who owns the company with this sign?\nChoices:\n(A) yao ming\n(B) ming khaphu\n(C) khaphu li\n(D) yao khaphu" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.6640625, "loss": 3.3102307319641113 }, { "id": "identity_152141", "image": "ScienceQA/images/7763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in dark green on the map?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 32.96875, "loss": 2.6244425773620605 }, { "id": "identity_198214", "image": "VQA2.0/images/422405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any non-human animals visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 9.375, "loss": 3.2176177501678467 }, { "id": "identity_238524", "image": "TabMWP/images/25332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average score of the friends who went bowling, and how does it compare to the median score?" }, { "from": "gpt", "value": "The answer is Average is 65.6, median is 68" } ], "clip_score": 32.4375, "loss": 2.3921990394592285 }, { "id": "identity_301943", "image": "MapQA/images/map_5242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this legend a continuous bar?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 13.25, "loss": 3.1783711910247803 }, { "id": "identity_337592", "image": "PMC-VQA/images/PMC7230637_children-07-00034-f034_172180.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the Doppler recording indicate in the descending aorta of a premature infant with a small ductus?\nChoices:\n(A) Abnormal systolic flow and abnormal diastolic flow.\n(B) Normal systolic flow and retrograde diastolic flow.\n(C) Normal systolic flow and normal anterograde diastolic flow.\n(D) Abnormal systolic flow and anterograde diastolic flow." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.140625, "loss": 3.966571092605591 }, { "id": "identity_39900", "image": "FigureQA/images/72794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Violet Red?" }, { "from": "gpt", "value": "The answer is 3.5" } ], "clip_score": 22.40625, "loss": 2.658268690109253 }, { "id": "identity_322025", "image": "TabMWP/images/20944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money is needed to buy 5 shapes stencils and a red kazoo?" }, { "from": "gpt", "value": "The answer is 4.40" } ], "clip_score": 27.453125, "loss": 1.9199022054672241 }, { "id": "identity_81073", "image": "GeoQA+/images/16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that ∠BCE = 70° in the inscribed quadrilateral ABCD, what is the measure of ∠CDE?" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 34.3125, "loss": 1.450562834739685 }, { "id": "identity_254157", "image": "PMC-VQA/images/PMC7154549_epi16451-fig-0002_150985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which imaging maps showed the lesion clearly in a FCD IIb patient, and what is the average intensity value of the lesion area in the ICVF map?" }, { "from": "gpt", "value": "The answer is ICVF map, average intensity value is approximately 1.7" } ], "clip_score": 29.234375, "loss": 2.3922054767608643 }, { "id": "identity_257808", "image": "TabMWP/images/174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected student has either a fear of heights or wants to be a doctor? Simplify any fractions" }, { "from": "gpt", "value": "The answer is 3/4" } ], "clip_score": 21.75, "loss": 1.73897123336792 }, { "id": "identity_125170", "image": "A-OKVQA/images/416337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the bananas visible in the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 29.640625, "loss": 2.5767500400543213 }, { "id": "identity_80390", "image": "GeoQA+/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB = AD and ∠BOD = 124°, what is the measure of ∠CAB?" }, { "from": "gpt", "value": "The answer is 28°" } ], "clip_score": 30.578125, "loss": 2.2784345149993896 }, { "id": "identity_329714", "image": "PMC-VQA/images/PMC7117604_FIG1_144076.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which side of the bronchus was affected by the lesion?\nChoices:\n(A) Left\n(B) Right\n(C) Both\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.90625, "loss": 4.265395641326904 }, { "id": "identity_175946", "image": "TabMWP/images/23510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of balloon animals requested at the party?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 15.265625, "loss": 2.2523562908172607 }, { "id": "identity_184769", "image": "VQA-AS/images/16633.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the person's trousers?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.75, "loss": 3.6473286151885986 }, { "id": "identity_264195", "image": "GeoQA+/images/5651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the circle O, AB is the diameter of the circle, AB = √{2}AC, ∠ODC = 12°. What is the degree measure of ∠DBC?\nChoices:\n(A) 80°\n(B) 102°\n(C) 120°\n(D) 135°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.59375, "loss": 2.4584593772888184 }, { "id": "identity_198715", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the imaging modality used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 34.15625, "loss": 2.399345874786377 }, { "id": "identity_211864", "image": "Super-CLEVR/images/superCLEVR_new_001411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any objects in the image that have a number displayed on them?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.15625, "loss": 3.0615971088409424 }, { "id": "identity_63510", "image": "AI2D/images/3260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which structure in the image is responsible for protein synthesis?" }, { "from": "gpt", "value": "The answer is Ribosomes" } ], "clip_score": 25.171875, "loss": 1.7118077278137207 }, { "id": "identity_64325", "image": "ScienceQA/images/3080.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different temperature ranges are depicted in the image?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 21.171875, "loss": 3.0468952655792236 }, { "id": "identity_45375", "image": "PlotQA/images/84357.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of documents required per shipment to import goods in the United Kingdom in 2005?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.8125, "loss": 2.3932993412017822 }, { "id": "identity_275468", "image": "FigureQA/images/489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Purple not the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.078125, "loss": 3.992067813873291 }, { "id": "identity_78288", "image": "TQA/images/question_images/human_system_respiratory_6206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the part labeled in the image where gas exchange occurs in the lungs" }, { "from": "gpt", "value": "The answer is Alveolus" } ], "clip_score": 27.59375, "loss": 1.577326774597168 }, { "id": "identity_169113", "image": "TabMWP/images/174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students have a fear of snakes?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 17.796875, "loss": 3.008310556411743 }, { "id": "identity_275654", "image": "MapQA/images/map_11939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value in the South?" }, { "from": "gpt", "value": "The answer is 4748882-5221389" } ], "clip_score": 24.015625, "loss": 2.866283893585205 }, { "id": "identity_191563", "image": "PMC-VQA/images/PMC8063182_fig0001_423112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale measurement at the bottom of the MRI image?" }, { "from": "gpt", "value": "The answer is 5 cm" } ], "clip_score": 31.40625, "loss": 2.1053855419158936 }, { "id": "identity_300415", "image": "VQA2.0/images/227402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sport does the person sitting on the bench play?" }, { "from": "gpt", "value": "The answer is football" } ], "clip_score": 23.078125, "loss": 3.2785568237304688 }, { "id": "identity_196735", "image": "PMC-VQA/images/PMC6780818_nanomaterials-09-01195-f013_47195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many particles are approximately in the cluster located at the center of the image?" }, { "from": "gpt", "value": "The answer is An estimated count based on visual inspection" } ], "clip_score": 23.84375, "loss": 3.6210949420928955 }, { "id": "identity_316822", "image": "UniGeo/images/3389.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于圆O,∠C=30°,AB=2,则圆O的半径为()\n选项:\n(A) √{3}\n(B) 2√{3}\n(C) 2\n(D) 4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.84375, "loss": 2.6876277923583984 }, { "id": "identity_115705", "image": "PMC-VQA/images/PMC5034412_Fig6_76656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale measurement indicated at the bottom of the image?" }, { "from": "gpt", "value": "The answer is 10 micrometers" } ], "clip_score": 21.140625, "loss": 1.60796320438385 }, { "id": "identity_234377", "image": "DocVQA/images/fhcx0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total final body weight for all treatment groups combined?" }, { "from": "gpt", "value": "The answer is 11165 g" } ], "clip_score": 24.6875, "loss": 2.7042436599731445 }, { "id": "identity_48387", "image": "FigureQA/images/64058.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines have values above 80 at the 100 mark on the x-axis?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 22.265625, "loss": 4.843110084533691 }, { "id": "identity_126612", "image": "VQA2.0/images/310981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen in the background?" }, { "from": "gpt", "value": "The answer is One" } ], "clip_score": 16.375, "loss": 4.590146064758301 }, { "id": "identity_164468", "image": "Geometry3K/images/1819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle formed by the two tangents to the circle?" }, { "from": "gpt", "value": "The answer is 96 degrees" } ], "clip_score": 36.96875, "loss": 2.456205368041992 }, { "id": "identity_257623", "image": "GeoQA+/images/4435.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BD、CE是△ABC的两条角平分线,AN⊥BD于点N,AM⊥CE于点M,连接MN,若△ABC的周长为17,BC=7,求△AMN的面积。" }, { "from": "gpt", "value": "The answer is \\(\\frac{7}{4}\\)" } ], "clip_score": 26.53125, "loss": 2.6598548889160156 }, { "id": "identity_126062", "image": "PMC-VQA/images/PMC3535131_Fig5_176625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any soft tissue abnormalities in the X-ray?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 29.421875, "loss": 3.421531915664673 }, { "id": "identity_250161", "image": "AI2D/images/3529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the part labeled as the protoplasm outside the nucleus of a cell and list two other components found within this part" }, { "from": "gpt", "value": "The answer is cytoplasm, mitochondria, glycogen granules" } ], "clip_score": 32.25, "loss": 1.6432431936264038 }, { "id": "identity_98084", "image": "CLEVR-Math/images/CLEVR_train_011571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 1 black cylinder. How many objects are left?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.578125, "loss": 2.071913957595825 }, { "id": "identity_114673", "image": "A-OKVQA/images/474253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the rider's bib?" }, { "from": "gpt", "value": "The answer is 532" } ], "clip_score": 21.0625, "loss": 2.5103583335876465 }, { "id": "identity_332312", "image": "A-OKVQA/images/435579.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who observes these individuals as they engage in snowboarding?\nChoices:\n(A) judges\n(B) coal miners\n(C) no one\n(D) enemies" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.328125, "loss": 4.640621662139893 }, { "id": "identity_139526", "image": "PlotQA/images/8562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did Malaysia have a percentage of ICT goods exported above 30%?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.453125, "loss": 2.4018945693969727 }, { "id": "identity_262909", "image": "TQA/images/question_images/human_system_respiratory_3632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which system of the human body is being represented in the image? \nOptions:\n(A) the nervous system\n(B) The Respiratory System\n(C) the reproductive system\n(D) the digestive system" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.625, "loss": 3.4868881702423096 }, { "id": "identity_199250", "image": "PMC-VQA/images/PMC5352873_fig1_140562.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the tumor affecting the left or right side of the brain?" }, { "from": "gpt", "value": "The answer is The tumor is affecting the left side of the brain" } ], "clip_score": 29.046875, "loss": 1.7456412315368652 }, { "id": "identity_272555", "image": "FigureQA/images/3310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Bubblegum the highest possible option?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.671875, "loss": 4.401447772979736 }, { "id": "identity_141731", "image": "DVQA/images/bar_train_00069078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the most consistent values for \"panic\"?" }, { "from": "gpt", "value": "The answer is ballet" } ], "clip_score": 26.15625, "loss": 3.2232329845428467 }, { "id": "identity_210533", "image": "VQA2.0/images/29189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the leash the dog is on?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.234375, "loss": 3.4208626747131348 }, { "id": "identity_228901", "image": "AI2D/images/2304.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of stages from egg mass to adult female?" }, { "from": "gpt", "value": "The answer is Egg mass, larva, nymph, adult female" } ], "clip_score": 29.75, "loss": 1.7166013717651367 }, { "id": "identity_217547", "image": "VQA2.0/images/166173.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are watching the skateboarder?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 25.546875, "loss": 2.807339906692505 }, { "id": "identity_12962", "image": "MapQA/images/map_10779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Connecticut have the lowest value in the USA ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.375, "loss": 2.4904837608337402 }, { "id": "identity_185654", "image": "VQA2.0/images/65968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What mountain range is visible in the background?" }, { "from": "gpt", "value": "The answer is San Jacinto Mountains" } ], "clip_score": 29.140625, "loss": 2.001450538635254 }, { "id": "identity_206241", "image": "PMC-VQA/images/PMC5500524_Fig2_187345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cells are visible in the image with green fluorescence?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 31.5625, "loss": 4.105861663818359 }, { "id": "identity_306124", "image": "TextVQA/images/16318cd66bcaf260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the place here?" }, { "from": "gpt", "value": "The answer is sunshine" } ], "clip_score": 24.640625, "loss": 3.914632558822632 }, { "id": "identity_270278", "image": "PMC-VQA/images/PMC6930684_Fig1_90642.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to capture the images shown? \nOptions:\n(A) Computed Tomography \n(B) Magnetic Resonance Imaging \n(C) X-Ray \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.224893569946289 }, { "id": "identity_112456", "image": "Super-CLEVR/images/superCLEVR_new_001758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are facing the left side of the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.34375, "loss": 2.610086441040039 }, { "id": "identity_249087", "image": "DVQA/images/bar_train_00138942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total accuracy of all algorithms for all datasets?" }, { "from": "gpt", "value": "The answer is 33" } ], "clip_score": 35.34375, "loss": 2.70202898979187 }, { "id": "identity_61544", "image": "ChartQA/images/multi_col_60856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of Established markets for both Innovative Medicines and Sandoz?" }, { "from": "gpt", "value": "The answer is 149%" } ], "clip_score": 24.765625, "loss": 2.476682424545288 }, { "id": "identity_103793", "image": "A-OKVQA/images/112377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buses can be seen in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.90625, "loss": 2.5493428707122803 }, { "id": "identity_19512", "image": "GeoQA+/images/4258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,圆O中,弦AB、CD互相垂直且相交于点P,∠A=35°,则∠B的大小是()\nChoices:\n(A) 35°\n(B) 55°\n(C) 65°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.703125, "loss": 2.3853371143341064 }, { "id": "identity_327635", "image": "A-OKVQA/images/574940.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fabric is suitable for window coverings?\nChoices:\n(A) linen\n(B) silk\n(C) nylon\n(D) cotton" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.765625, "loss": 4.378051280975342 }, { "id": "identity_50123", "image": "MapQA/images/map_1688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Texas have more homers than New York?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.59375, "loss": 2.6793532371520996 }, { "id": "identity_122342", "image": "A-OKVQA/images/146388.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Cloudy" } ], "clip_score": 15.984375, "loss": 2.5983822345733643 }, { "id": "identity_74637", "image": "ScienceQA/images/6870.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature located directly north of the state marked with a yellow dot?" }, { "from": "gpt", "value": "The answer is Vancouver Island" } ], "clip_score": 29.28125, "loss": 3.165344715118408 }, { "id": "identity_39494", "image": "FigureQA/images/67371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the x-axis?" }, { "from": "gpt", "value": "The answer is xaxis_label" } ], "clip_score": 21.703125, "loss": 3.481096029281616 }, { "id": "identity_226963", "image": "PMC-VQA/images/PMC6565797_Fig5_493341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between MRI and CT scans in diagnosing strokes, and why might an MRI be preferred in this case?" }, { "from": "gpt", "value": "The answer is MRI provides higher resolution images of soft tissues" } ], "clip_score": 30.96875, "loss": 3.1372733116149902 }, { "id": "identity_211618", "image": "A-OKVQA/images/563645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the large structure in the background of the image?" }, { "from": "gpt", "value": "The answer is The Lincoln Memorial" } ], "clip_score": 23.171875, "loss": 2.7337663173675537 }, { "id": "identity_180198", "image": "IconQA/images/63604.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple paw prints are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 33.15625, "loss": 2.2726428508758545 }, { "id": "identity_180036", "image": "IconQA/images/16020.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of marbles per row?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 25.40625, "loss": 1.8651793003082275 }, { "id": "identity_324899", "image": "A-OKVQA/images/210728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time of day is depicted in the image?\nChoices:\n(A) morning\n(B) evening\n(C) afternoon\n(D) night" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 9.140625, "loss": 4.675020217895508 }, { "id": "identity_41858", "image": "FigureQA/images/72454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Medium Aqua consistently increasing?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.59375, "loss": 3.3844337463378906 }, { "id": "identity_141922", "image": "ChartQA/images/6233.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the country with the highest and lowest perception of China's growing economy being a good thing?" }, { "from": "gpt", "value": "The answer is 50% (Australia 70% - India 20%)" } ], "clip_score": 30.375, "loss": 1.5829246044158936 }, { "id": "identity_246001", "image": "DVQA/images/bar_train_00048541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold by the store represented by the darkorange color across both 'fate' and 'clouds'?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.71875, "loss": 2.310084104537964 }, { "id": "identity_192253", "image": "PMC-VQA/images/PMC5241658_f5_115343.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the coils in the SEM image of uniform thickness?" }, { "from": "gpt", "value": "The answer is No, the coils have varying thicknesses" } ], "clip_score": 27.53125, "loss": 1.886844515800476 }, { "id": "identity_77892", "image": "TQA/images/abc_question_images/human_system_nervous_16176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which letter represents the thalamus?" }, { "from": "gpt", "value": "The answer is Y" } ], "clip_score": 29.84375, "loss": 3.7559094429016113 }, { "id": "identity_208702", "image": "VQA-AS/images/17248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are there in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.546875, "loss": 4.170645713806152 }, { "id": "identity_322082", "image": "UniGeo/images/3105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB∥CD,EF⊥AB于E,EF交CD于F,己知∠2=20°,则∠1等于多少?\n选项:\n(A) 30°\n(B) 50°\n(C) 70°\n(D) 45°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.578125, "loss": 2.8826375007629395 }, { "id": "identity_106335", "image": "TextVQA/images/894802fe52c4dafb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of creature is the character on the cover fighting?" }, { "from": "gpt", "value": "The answer is Imaginary monsters" } ], "clip_score": 24.984375, "loss": 3.0654006004333496 }, { "id": "identity_115866", "image": "VQA2.0/images/61729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked on the left side of the street?" }, { "from": "gpt", "value": "The answer is Van" } ], "clip_score": 18.90625, "loss": 3.4375545978546143 }, { "id": "identity_47126", "image": "FigureQA/images/4558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of Light Salmon greater than 50?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.875, "loss": 3.678730010986328 }, { "id": "identity_162270", "image": "UniGeo/images/62.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If OA is the radius of circle O and chord BC is perpendicular to OA at point A, and angle ADC is 26 degrees, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 3.5 degrees" } ], "clip_score": 36.34375, "loss": 2.3167471885681152 }, { "id": "identity_280968", "image": "PlotQA/images/21941.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the yearly average energy consumption?" }, { "from": "gpt", "value": "The answer is 978.4786124997596" } ], "clip_score": 20.703125, "loss": 2.629655599594116 }, { "id": "identity_320716", "image": "TextVQA/images/617ce40fdfb99184.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the shirt?" }, { "from": "gpt", "value": "The answer is hilingdon" } ], "clip_score": 18.0, "loss": 4.969935417175293 }, { "id": "identity_198621", "image": "PMC-VQA/images/PMC3418000_F2_149475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical imaging technique used to obtain this image?" }, { "from": "gpt", "value": "The answer is Computed Tomography (CT)" } ], "clip_score": 29.46875, "loss": 1.7082451581954956 }, { "id": "identity_241614", "image": "GeoQA+/images/2247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图所示,在△ABC中,已知点D,E,F分别为边BC,AD,CE的中点,且S△ABC=4cm²,求△DEF的周长。" }, { "from": "gpt", "value": "The answer is The perimeter of △DEF is half of the perimeter of △ABC" } ], "clip_score": 25.640625, "loss": 1.401857614517212 }, { "id": "identity_123748", "image": "PMC-VQA/images/PMC7148584_fig3_149588.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other shapes present in the image besides the markers?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.34375, "loss": 3.236295700073242 }, { "id": "identity_197786", "image": "A-OKVQA/images/385709.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the umpire's uniform?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.375, "loss": 3.1133885383605957 }, { "id": "identity_4872", "image": "UniGeo/images/1641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,C、D是线段AB上的两个点,CD=3cm,M是AC的中点,N是DB的中点,AB=9.8cm,那么线段MN的长等于()\nChoices:\n(A) 5.4cm\n(B) 6.4cm\n(C) 6.8cm\n(D) 7cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.65625, "loss": 2.5189242362976074 }, { "id": "identity_25121", "image": "ScienceQA/images/5636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the following statement about our solar system true or false?\nThe smallest planet is made mainly of rock.\nChoices:\n(A) false\n(B) true" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.53125, "loss": 4.005278587341309 }, { "id": "identity_119654", "image": "TextVQA/images/d7481a13ffca5083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of structure is being constructed in the image?" }, { "from": "gpt", "value": "The answer is A bridge" } ], "clip_score": 17.453125, "loss": 3.7448580265045166 }, { "id": "identity_319476", "image": "VQA2.0/images/539640.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these all vegetables?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 7.2421875, "loss": 3.2472519874572754 }, { "id": "identity_3312", "image": "FigureQA/images/82361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Gray have the minimum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.125, "loss": 3.854849338531494 }, { "id": "identity_166626", "image": "IconQA/images/23729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of triangle vertices present in the image?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 27.5625, "loss": 2.0128190517425537 }, { "id": "identity_119442", "image": "VQA-AS/images/4805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the picnic blanket?" }, { "from": "gpt", "value": "The answer is Red and white checkered" } ], "clip_score": 32.4375, "loss": 2.74416184425354 }, { "id": "identity_197071", "image": "TextVQA/images/ea8c7599cc7f7b5b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is the headless horseman holding up?" }, { "from": "gpt", "value": "The answer is A pumpkin" } ], "clip_score": 23.671875, "loss": 2.502056837081909 }, { "id": "identity_27708", "image": "VQA2.0/images/326143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car next to the black truck?" }, { "from": "gpt", "value": "The answer is van" } ], "clip_score": 24.59375, "loss": 4.701593399047852 }, { "id": "identity_116185", "image": "VQA2.0/images/330402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is scattered on the ground near the animals?" }, { "from": "gpt", "value": "The answer is Fruits and vegetables" } ], "clip_score": 23.8125, "loss": 2.1574134826660156 }, { "id": "identity_192457", "image": "PMC-VQA/images/PMC5784892_pone.0190878.g002_267678.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the brain tissue in the MRI image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 35.59375, "loss": 3.729266405105591 }, { "id": "identity_143682", "image": "PlotQA/images/122776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in rural population percentage between Thailand in 1960 and 1962?" }, { "from": "gpt", "value": "The answer is Approximately 2,000,000" } ], "clip_score": 27.28125, "loss": 1.9002878665924072 }, { "id": "identity_40506", "image": "DocVQA/images/lhhx0023_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of the document?" }, { "from": "gpt", "value": "The answer is Tax Stamps" } ], "clip_score": 24.59375, "loss": 3.6124470233917236 }, { "id": "identity_117312", "image": "Super-CLEVR/images/superCLEVR_new_000142.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the green rubber bicycle larger than the blue rubber bicycle?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.828125, "loss": 3.3344337940216064 }, { "id": "identity_159391", "image": "GeoQA+/images/1914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, what is the measure of ∠CDB?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 34.40625, "loss": 2.5476417541503906 }, { "id": "identity_153903", "image": "ScienceQA/images/3117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label is directly to the left of the Fire Department?" }, { "from": "gpt", "value": "The answer is School" } ], "clip_score": 23.59375, "loss": 3.9521689414978027 }, { "id": "identity_195937", "image": "VQA2.0/images/364571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the people in the image likely engaged in?" }, { "from": "gpt", "value": "The answer is Safari or wildlife tour" } ], "clip_score": 29.78125, "loss": 2.7125942707061768 }, { "id": "identity_217242", "image": "VQA2.0/images/425550.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are the visible pizza toppings?" }, { "from": "gpt", "value": "The answer is Red, yellow, green, and brown" } ], "clip_score": 26.40625, "loss": 2.428898811340332 }, { "id": "identity_289659", "image": "ScienceQA/images/4431.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which pots were included in the experimental group in this experiment?\nChoices:\n(A) the pots with salted water\n(B) the pots with pure water" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.515625, "loss": 3.6558666229248047 }, { "id": "identity_265684", "image": "VQA-RAD/images/synpic51926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does a pancreatic pseudocyst exist?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.671875, "loss": 3.2796695232391357 }, { "id": "identity_113637", "image": "A-OKVQA/images/92066.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are crossing the street?" }, { "from": "gpt", "value": "The answer is There are nine people crossing the street" } ], "clip_score": 19.421875, "loss": 2.780104637145996 }, { "id": "identity_227551", "image": "DocVQA/images/nywf0055_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total duration of the transmission in seconds?" }, { "from": "gpt", "value": "The answer is 363" } ], "clip_score": 22.59375, "loss": 2.642483949661255 }, { "id": "identity_102034", "image": "IconQA/images/31371.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hexagons have sides that are all the same length?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 37.125, "loss": 2.3541553020477295 }, { "id": "identity_250561", "image": "ScienceQA/images/320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the map and name its capital" }, { "from": "gpt", "value": "The answer is Des Moines" } ], "clip_score": 30.546875, "loss": 2.5676681995391846 }, { "id": "identity_70285", "image": "AI2D/images/1232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction does the yellow arrow between winter and spring point towards?" }, { "from": "gpt", "value": "The answer is East" } ], "clip_score": 29.421875, "loss": 3.4571304321289062 }, { "id": "identity_310458", "image": "Geometry3K/images/606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of \\angle 7 given that A B \\perp B C.\nChoices:\n(A) 110\n(B) 120\n(C) 130\n(D) 140" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 32.78125, "loss": 2.628420352935791 }, { "id": "identity_203392", "image": "A-OKVQA/images/124609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any person standing on the ground without a horse?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 21.921875, "loss": 3.203778028488159 }, { "id": "identity_307116", "image": "PlotQA/images/51714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the 1st bar from the top in this region represent?" }, { "from": "gpt", "value": "The answer is 1982" } ], "clip_score": 17.484375, "loss": 2.644385814666748 }, { "id": "identity_220368", "image": "PMC-VQA/images/PMC4663278_F1_448653.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show the entire brain or only a portion of it?" }, { "from": "gpt", "value": "The answer is Only a portion" } ], "clip_score": 30.828125, "loss": 3.300816059112549 }, { "id": "identity_122591", "image": "A-OKVQA/images/268725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible logos or brand names are there on the rider's suit?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 25.390625, "loss": 4.526073455810547 }, { "id": "identity_162785", "image": "GeoQA+/images/3150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, if ∠C = 90°, AD bisects ∠BAC, DE ⊥ AB at E, DE = 3, and ∠B = 30°, what is the length of AC?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.03125, "loss": 2.568361520767212 }, { "id": "identity_94429", "image": "TabMWP/images/15056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of buying one of each item on the menu?" }, { "from": "gpt", "value": "The answer is $17.77" } ], "clip_score": 29.265625, "loss": 2.1050901412963867 }, { "id": "identity_258940", "image": "ScienceQA/images/20686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of Rhode Island and then list the capitals of all the states that border Rhode Island" }, { "from": "gpt", "value": "The answer is Providence, Hartford, Boston" } ], "clip_score": 27.203125, "loss": 2.9198949337005615 }, { "id": "identity_76005", "image": "TQA/images/abc_question_images/parts_telescope_17287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the part labeled J?" }, { "from": "gpt", "value": "The answer is To focus light" } ], "clip_score": 27.375, "loss": 3.6445159912109375 }, { "id": "identity_320699", "image": "VQA2.0/images/301671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is anyone on this?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.21875, "loss": 3.2143733501434326 }, { "id": "identity_118438", "image": "VizWiz/images/VizWiz_train_00008105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main ingredient in this product?" }, { "from": "gpt", "value": "The answer is Tomatoes" } ], "clip_score": 28.203125, "loss": 3.9154324531555176 }, { "id": "identity_264143", "image": "FigureQA/images/75147.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange the curve with the largest area?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.0625, "loss": 4.583253860473633 }, { "id": "identity_1788", "image": "IconQA/images/11215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a star. Which picture is fourth?\nChoices:\n(A) star\n(B) rocket\n(C) moon" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.375, "loss": 4.10022497177124 }, { "id": "identity_67259", "image": "TQA/images/abc_question_images/ocean_waves_17120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the direction of wave propagation?" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 24.28125, "loss": 2.9248814582824707 }, { "id": "identity_16475", "image": "A-OKVQA/images/507551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the bat resting on his shoulder?\nChoices:\n(A) hiding bat\n(B) hit ball\n(C) resting\n(D) stealing bat" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.046875, "loss": 4.159934997558594 }, { "id": "identity_334176", "image": "VQA2.0/images/527364.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is it daytime or nighttime?" }, { "from": "gpt", "value": "The answer is day" } ], "clip_score": 11.484375, "loss": 3.9097564220428467 }, { "id": "identity_194668", "image": "A-OKVQA/images/196811.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible traffic signs in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.03125, "loss": 2.9884896278381348 }, { "id": "identity_209543", "image": "A-OKVQA/images/497610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing helmets?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 17.734375, "loss": 2.5930888652801514 }, { "id": "identity_114636", "image": "TextVQA/images/8008f1ba5fab9e76.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of watch is advertised in the image based on its features?" }, { "from": "gpt", "value": "The answer is A luxury sports watch" } ], "clip_score": 29.671875, "loss": 3.4060986042022705 }, { "id": "identity_144540", "image": "PlotQA/images/59700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the proportion of economically active male population between Belize and Ireland in 2012?" }, { "from": "gpt", "value": "The answer is Approximately 5%" } ], "clip_score": 29.5625, "loss": 2.3654096126556396 }, { "id": "identity_218298", "image": "PMC-VQA/images/PMC8072877_diagnostics-11-00724-f004_427469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What part of the body is shown in this image?" }, { "from": "gpt", "value": "The answer is The eye, specifically the retina" } ], "clip_score": 35.5, "loss": 2.683351993560791 }, { "id": "identity_13581", "image": "TabMWP/images/5407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: A stock broker followed the stock prices of a certain set of companies. Which company's stock cost more on Wednesday, Houston Sporting Goods or Watson and Partners?\nChoices:\n(A) Houston Sporting Goods\n(B) Watson and Partners" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.703125, "loss": 3.9989402294158936 }, { "id": "identity_93724", "image": "TabMWP/images/34868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of fire hydrants on Oak Street and Elm Street combined?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 41.0, "loss": 1.9613511562347412 }, { "id": "identity_284321", "image": "TabMWP/images/3024.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Examine the table provided. Does this relation qualify as a function?\nOptions:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.625, "loss": 3.151965618133545 }, { "id": "identity_172474", "image": "TabMWP/images/29453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of coins in the collections?" }, { "from": "gpt", "value": "The answer is 83" } ], "clip_score": 33.875, "loss": 2.341712713241577 }, { "id": "identity_291308", "image": "VQA-AS/images/6626.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is she attempting to grab a book?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.0625, "loss": 3.5633461475372314 }, { "id": "identity_320421", "image": "PMC-VQA/images/PMC5240427_Fig3_115040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body shows intense uptake of FDG in this image?\nChoices:\n(A) Liver\n(B) Lungs\n(C) Heart\n(D) Kidneys." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.9375, "loss": 4.639955043792725 }, { "id": "identity_260460", "image": "PMC-VQA/images/PMC7262938_tca13421-fig-0001_181564.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the specific anatomical structures labeled in the image and explain their significance in the context of a right upper lobectomy" }, { "from": "gpt", "value": "The answer is A1+3 stump, V1-3 stump, A2 stump, B1-3 stump, SVC, Azygos vein" } ], "clip_score": 32.625, "loss": 1.1982969045639038 }, { "id": "identity_292131", "image": "VQA2.0/images/540554.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of tracks?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 13.6953125, "loss": 2.2926933765411377 }, { "id": "identity_170163", "image": "TabMWP/images/13394.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Kayla removes all the gold coins, how many coins will remain in the jar?" }, { "from": "gpt", "value": "The answer is 27" } ], "clip_score": 29.125, "loss": 2.7059319019317627 }, { "id": "identity_61039", "image": "DVQA/images/bar_train_00013148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'staff' value for 'cost' greater than the 'staff' value for 'cap'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.203125, "loss": 3.3084824085235596 }, { "id": "identity_205171", "image": "VQA2.0/images/63875.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wooden posts are visible in the fence?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 21.765625, "loss": 4.609109401702881 }, { "id": "identity_272772", "image": "TQA/images/question_images/nuclear_energy_8104.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At the third generation, how many nuclei are there?\nChoices:\n(A) 3\n(B) 2\n(C) 4\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.203125, "loss": 2.762315273284912 }, { "id": "identity_172189", "image": "TabMWP/images/29173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a price of $450, is there a shortage or a surplus?" }, { "from": "gpt", "value": "The answer is Shortage" } ], "clip_score": 26.0, "loss": 2.6165475845336914 }, { "id": "identity_309989", "image": "DocVQA/images/gnnp0000_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of this page?" }, { "from": "gpt", "value": "The answer is WINSTON NO BULL LAUNCH AGENDA" } ], "clip_score": 22.21875, "loss": 1.7187954187393188 }, { "id": "identity_153713", "image": "ScienceQA/images/11473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the magnetic poles in Pair 2?" }, { "from": "gpt", "value": "The answer is Opposite poles facing each other" } ], "clip_score": 23.96875, "loss": 2.3060410022735596 }, { "id": "identity_8429", "image": "MapQA/images/map_6522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the South ?" }, { "from": "gpt", "value": "The answer is 1135750-1350476" } ], "clip_score": 22.25, "loss": 2.4758269786834717 }, { "id": "identity_196095", "image": "A-OKVQA/images/539317.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any land visible in the background?" }, { "from": "gpt", "value": "The answer is Yes, there is land with buildings visible" } ], "clip_score": 16.203125, "loss": 2.6734132766723633 }, { "id": "identity_218553", "image": "TextVQA/images/1ee6fb55aa254fb5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of admission for the event?" }, { "from": "gpt", "value": "The answer is 5 euros" } ], "clip_score": 17.15625, "loss": 1.969772458076477 }, { "id": "identity_55271", "image": "DocVQA/images/rlnf0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of J.B. Cordaro in the OTA Food Program Staff?" }, { "from": "gpt", "value": "The answer is Food Program Manager" } ], "clip_score": 29.703125, "loss": 2.4407689571380615 }, { "id": "identity_217621", "image": "TextVQA/images/b5e8e0f751103b8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the pom-poms being used by the cheerleaders?" }, { "from": "gpt", "value": "The answer is Yellow and blue" } ], "clip_score": 29.953125, "loss": 2.623167037963867 }, { "id": "identity_85793", "image": "Geometry3K/images/1690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the diagram?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 29.46875, "loss": 2.646040678024292 }, { "id": "identity_36963", "image": "ScienceQA/images/943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Neil's experiment can best answer.\nChoices:\n(A) Can pennies hold more drops of water mixed with dish soap or water mixed with hand soap?\n(B) Can pennies hold more drops of pure water or water mixed with hand soap?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.96875, "loss": 3.8445446491241455 }, { "id": "identity_304266", "image": "ChartQA/images/multi_col_40268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many points did the green line move from Q1 to Q3 in 2016?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 24.453125, "loss": 2.463874340057373 }, { "id": "identity_4608", "image": "GeoQA+/images/3443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图1,在⊙O中,若点C是⁀{AB}中点,∠OAB=50°,则∠BOC的度数为()\nChoices:\n(A) 40°\n(B) 45°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.703125, "loss": 2.622156858444214 }, { "id": "identity_44311", "image": "FigureQA/images/17087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Tomato category represent less than 25% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 34.5, "loss": 2.868183135986328 }, { "id": "identity_187928", "image": "PMC-VQA/images/PMC5438315_Fig9_166518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the CT imaging technique used to obtain this image?" }, { "from": "gpt", "value": "The answer is Contrast-enhanced CT scan" } ], "clip_score": 30.578125, "loss": 2.043879508972168 }, { "id": "identity_166380", "image": "TabMWP/images/22087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much did the per-student arts education budget decrease from its highest point to its lowest point according to the table?" }, { "from": "gpt", "value": "The answer is $10" } ], "clip_score": 33.46875, "loss": 2.24011492729187 }, { "id": "identity_143306", "image": "DocVQA/images/tqjp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person who signed the letter?" }, { "from": "gpt", "value": "The answer is William J. Darby, M.D., Ph.D" } ], "clip_score": 23.578125, "loss": 1.8275055885314941 }, { "id": "identity_252466", "image": "Super-CLEVR/images/superCLEVR_new_000468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined color of the airliner and the road bike that are the same size as the metallic road bike?" }, { "from": "gpt", "value": "The answer is cyan and blue" } ], "clip_score": 27.296875, "loss": 3.6272318363189697 }, { "id": "identity_104434", "image": "PMC-VQA/images/PMC5331247_ccr3798-fig-0001_133177.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the abnormality located in the anterior or posterior part of the brain?" }, { "from": "gpt", "value": "The answer is Anterior part of the brain" } ], "clip_score": 29.5625, "loss": 2.0401363372802734 }, { "id": "identity_90416", "image": "Geometry3K/images/1031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ABCD is a kite, find the measure of angle BAD" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 34.90625, "loss": 2.165208101272583 }, { "id": "identity_166574", "image": "TabMWP/images/18362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which high school has the least number of math teachers?" }, { "from": "gpt", "value": "The answer is Northside High" } ], "clip_score": 31.578125, "loss": 2.4353535175323486 }, { "id": "identity_92756", "image": "TabMWP/images/35288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the respondents preferred stretching?" }, { "from": "gpt", "value": "The answer is 29.09%" } ], "clip_score": 21.703125, "loss": 2.0265674591064453 }, { "id": "identity_242715", "image": "DocVQA/images/mqyg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full title and publication date of the document?" }, { "from": "gpt", "value": "The answer is BULLETIN OF THE NATIONAL RESEARCH COUNCIL, JULY 1950" } ], "clip_score": 29.875, "loss": 1.221527338027954 }, { "id": "identity_236749", "image": "TextVQA/images/f9d18252cce3fdd0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the person associated with this plane, and what is the website URL mentioned on the plane?" }, { "from": "gpt", "value": "The answer is Seppo Pekkalä, www.pelitalo-pate.com/multiflyer" } ], "clip_score": 27.5625, "loss": 4.347412109375 }, { "id": "identity_89817", "image": "UniGeo/images/1423.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diameter AB of circle O?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 34.9375, "loss": 2.0907890796661377 }, { "id": "identity_100985", "image": "CLEVR-Math/images/CLEVR_train_021994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects with a metallic texture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.828125, "loss": 2.1840410232543945 }, { "id": "identity_206021", "image": "PMC-VQA/images/PMC7527384_Fig9_263871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter 'P' indicate in this image?" }, { "from": "gpt", "value": "The answer is Prostate or a medical term starting with 'P'" } ], "clip_score": 25.21875, "loss": 3.235236883163452 }, { "id": "identity_111941", "image": "Super-CLEVR/images/superCLEVR_new_000675.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is directly in front of the purple bicycle?" }, { "from": "gpt", "value": "The answer is A car" } ], "clip_score": 23.8125, "loss": 4.514457702636719 }, { "id": "identity_314464", "image": "FigureQA/images/56475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Crimson less than this?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.796875, "loss": 3.8294739723205566 }, { "id": "identity_19801", "image": "TextVQA/images/d06f04ba098c1f52.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what logo is displayed on the field?" }, { "from": "gpt", "value": "The answer is ny" } ], "clip_score": 22.46875, "loss": 3.0344226360321045 }, { "id": "identity_245109", "image": "VQA-AS/images/4877.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are likely to be present based on the items on the table and the seating arrangement?" }, { "from": "gpt", "value": "The answer is two" } ], "clip_score": 22.46875, "loss": 3.6663565635681152 }, { "id": "identity_17933", "image": "UniGeo/images/867.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,C,D是⊙O上两点,若∠D=35°,则∠OCB的度数是()\nChoices:\n(A) 35°\n(B) 55°\n(C) 65°\n(D) 70°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 2.7355589866638184 }, { "id": "identity_61819", "image": "DocVQA/images/rfhm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of document is shown in the image?" }, { "from": "gpt", "value": "The answer is AN OCCASIONAL PAPER" } ], "clip_score": 16.3125, "loss": 1.9211466312408447 }, { "id": "identity_258080", "image": "IconQA/images/17876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many edges does this shape have, and what is the total length of all the edges if each edge is 3 units long?" }, { "from": "gpt", "value": "The answer is 36 units" } ], "clip_score": 29.953125, "loss": 3.0497677326202393 }, { "id": "identity_94561", "image": "IconQA/images/99042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two more blue marbles are added, what is the new ratio of black marbles to blue marbles?" }, { "from": "gpt", "value": "The answer is 2:3" } ], "clip_score": 33.46875, "loss": 1.7134023904800415 }, { "id": "identity_323860", "image": "A-OKVQA/images/573973.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the oval object connected to these keys?\nChoices:\n(A) carabiner\n(B) ubolt\n(C) key chain\n(D) knife" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 8.9609375, "loss": 4.4694414138793945 }, { "id": "identity_247242", "image": "DVQA/images/bar_train_00112504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the 'price' bars across all datasets?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.703125, "loss": 2.3879687786102295 }, { "id": "identity_160348", "image": "GeoQA+/images/1387.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知A,B,C为⊙O上三点,若∠AOB=80°,则弧AC的度数为多少?" }, { "from": "gpt", "value": "The answer is 弧AC = 360° - 160° = 200°" } ], "clip_score": 31.890625, "loss": 1.3722624778747559 }, { "id": "identity_149568", "image": "DocVQA/images/yjcd0227_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the trend in calorie requirement per kilogram as the weight of mammals increases?" }, { "from": "gpt", "value": "The answer is Decreasing trend" } ], "clip_score": 18.328125, "loss": 3.138049602508545 }, { "id": "identity_151429", "image": "ScienceQA/images/3271.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 25.578125, "loss": 2.8730692863464355 }, { "id": "identity_92372", "image": "IconQA/images/90346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the new orientation of the letter G after the transformation?" }, { "from": "gpt", "value": "The answer is The letter G is now lying on its back" } ], "clip_score": 27.28125, "loss": 2.9738590717315674 }, { "id": "identity_194090", "image": "TextVQA/images/e6b1ae1ea0e0c099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player to the left in the image?" }, { "from": "gpt", "value": "The answer is 41" } ], "clip_score": 17.09375, "loss": 2.655247211456299 }, { "id": "identity_31108", "image": "A-OKVQA/images/54337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the men taste testing?\nChoices:\n(A) milk\n(B) water\n(C) juice\n(D) wine" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.71875, "loss": 4.003086566925049 }, { "id": "identity_178975", "image": "TabMWP/images/10200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more classmate is added to the list and they read 5 science articles, what will the new average be?" }, { "from": "gpt", "value": "The answer is 5.625" } ], "clip_score": 37.75, "loss": 2.089808225631714 }, { "id": "identity_106260", "image": "TextVQA/images/a3012301f2e49274.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beverage is contained in the bottle?" }, { "from": "gpt", "value": "The answer is Schnapps" } ], "clip_score": 24.171875, "loss": 2.955789804458618 }, { "id": "identity_336327", "image": "UniGeo/images/1557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,DE∥BC,DE分别与AB、AC相交于点D、E,若AE=4,EC=2,则BD:AB的值为多少?\nChoices:\n(A) 2\n(B) \\frac{1}{2}\n(C) \\frac{1}{3}\n(D) \\frac{2}{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.3125, "loss": 2.6604952812194824 }, { "id": "identity_58105", "image": "MapQA/images/map_8521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states have a value range of 41.8%-44.3%?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.515625, "loss": 2.5254499912261963 }, { "id": "identity_168963", "image": "TabMWP/images/16686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a customer wants to buy an equal amount of vanilla and butter pecan ice cream totaling 3 pounds, how much will it cost?" }, { "from": "gpt", "value": "The answer is $12" } ], "clip_score": 37.0625, "loss": 2.1350557804107666 }, { "id": "identity_325807", "image": "DocVQA/images/nsyg0024_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of help is IBM providing?" }, { "from": "gpt", "value": "The answer is stress testing the application" } ], "clip_score": 28.375, "loss": 4.78336238861084 }, { "id": "identity_115798", "image": "A-OKVQA/images/531146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible wheels does the bus have?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.640625, "loss": 2.2008891105651855 }, { "id": "identity_64598", "image": "ScienceQA/images/19125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How would the magnetic field lines look between these two magnets?" }, { "from": "gpt", "value": "The answer is Continuous loops from N to S between magnets" } ], "clip_score": 23.640625, "loss": 3.192931890487671 }, { "id": "identity_200504", "image": "Super-CLEVR/images/superCLEVR_new_000323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object located between the purple metallic sedan and the big yellow matte bicycle?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 32.40625, "loss": 3.1902921199798584 }, { "id": "identity_59447", "image": "ChartQA/images/multi_col_222.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of engagement in sports and leisure activities for all categories over the 10-year span?" }, { "from": "gpt", "value": "The answer is 95.77%" } ], "clip_score": 26.0625, "loss": 2.0722298622131348 }, { "id": "identity_293493", "image": "TextVQA/images/befc1327ce32a93f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which word appears first in the background's top banner?" }, { "from": "gpt", "value": "The answer is sports" } ], "clip_score": 19.640625, "loss": 3.139277696609497 }, { "id": "identity_249833", "image": "PMC-VQA/images/PMC9427313_fig2_389351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are indicated by the arrows in the POCUS image, and how do they relate spatially to the left portal branch indicated by the asterisk symbol?" }, { "from": "gpt", "value": "The answer is Hepatic veins, anterior to the left portal branch" } ], "clip_score": 27.140625, "loss": 2.829857110977173 }, { "id": "identity_235856", "image": "FigureQA/images/36621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the largest and smallest segments?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 21.859375, "loss": 1.8000867366790771 }, { "id": "identity_216489", "image": "TextVQA/images/038c8f14559fa56c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the truck's body excluding the text and images?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.53125, "loss": 3.5121490955352783 }, { "id": "identity_229321", "image": "ScienceQA/images/13384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean is highlighted, and which continents does it border?" }, { "from": "gpt", "value": "The answer is Antarctica, South America, Africa, Australia" } ], "clip_score": 33.90625, "loss": 2.1154797077178955 }, { "id": "identity_219098", "image": "PMC-VQA/images/PMC9029390_fig-13_261341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of MRI was used in the scan?" }, { "from": "gpt", "value": "The answer is T2-weighted MRI scan" } ], "clip_score": 29.640625, "loss": 1.7871289253234863 }, { "id": "identity_126996", "image": "VQA2.0/images/463533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the team of the pitcher?" }, { "from": "gpt", "value": "The answer is Los Angeles Dodgers" } ], "clip_score": 21.921875, "loss": 2.7264442443847656 }, { "id": "identity_313906", "image": "TextVQA/images/b47059b93cc7e74d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name on this?" }, { "from": "gpt", "value": "The answer is cognitive surplus; creativity and generosity in a connected age; by clay shirky" } ], "clip_score": 42.4375, "loss": 1.4445114135742188 }, { "id": "identity_54014", "image": "MapQA/images/map_4994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does California have a higher value range than Nevada?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.71875, "loss": 3.0675570964813232 }, { "id": "identity_215133", "image": "VQA2.0/images/391219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the canopy of the food truck on the far left?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 27.3125, "loss": 3.4075751304626465 }, { "id": "identity_153610", "image": "ScienceQA/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After reaching equilibrium, how many solute particles are on each side of the membrane?" }, { "from": "gpt", "value": "The answer is 4 particles on each side" } ], "clip_score": 29.5625, "loss": 2.0382237434387207 }, { "id": "identity_297632", "image": "TextVQA/images/708ba2e146e19132.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of beer is in the green bottle?" }, { "from": "gpt", "value": "The answer is mark twain" } ], "clip_score": 24.765625, "loss": 4.414865016937256 }, { "id": "identity_247888", "image": "UniGeo/images/2489.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that point A is on circle O, ∠OCB = 40°, and ∠AOB = 2∠ACB, find the measure of ∠A" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 33.65625, "loss": 1.809473991394043 }, { "id": "identity_262983", "image": "PMC-VQA/images/PMC4896362_F1_38630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was utilized to create the blue color in the image? Is it:\n(A) Safranin-O\n(B) Crystal violet\n(C) Gram stain\n(D) Calcofluor white" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.875, "loss": 4.35259485244751 }, { "id": "identity_19519", "image": "TabMWP/images/24001.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Lauren need to buy a bottle of perfume and a bottle of mouthwash?" }, { "from": "gpt", "value": "The answer is 9.73" } ], "clip_score": 24.96875, "loss": 2.291741132736206 }, { "id": "identity_314981", "image": "GeoQA+/images/683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD内接于⊙O,AB=AD,连接BD,若∠C=120°,AB=2,则△ABD的周长是多少?\nChoices:\n(A) 3\n(B) 4\n(C) 6\n(D) 8" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.34375, "loss": 2.3540005683898926 }, { "id": "identity_117561", "image": "A-OKVQA/images/260901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many railway tracks can be seen in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.46875, "loss": 2.1310722827911377 }, { "id": "identity_83826", "image": "Geometry3K/images/1415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle A" }, { "from": "gpt", "value": "The answer is 82 degrees" } ], "clip_score": 31.421875, "loss": 2.638812780380249 }, { "id": "identity_30880", "image": "A-OKVQA/images/331214.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which conveyance pictured here uses less gas?\nChoices:\n(A) truck\n(B) van\n(C) buggy\n(D) car" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.046875, "loss": 4.082180976867676 }, { "id": "identity_320118", "image": "DocVQA/images/rsxw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is this meeting organized?" }, { "from": "gpt", "value": "The answer is VNA LIBRARY" } ], "clip_score": 20.484375, "loss": 4.146758556365967 }, { "id": "identity_179674", "image": "TabMWP/images/1552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of miles run by the team members?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.671875, "loss": 2.445478677749634 }, { "id": "identity_306363", "image": "ChartQA/images/OECD_ENROLMENT_RATE_IN_EARLY_CHILDHOOD_EDUCATION_BRA_RUS_000014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is shown by the blue line?" }, { "from": "gpt", "value": "The answer is Brazil" } ], "clip_score": 14.96875, "loss": 3.2349071502685547 }, { "id": "identity_171250", "image": "IconQA/images/15772.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of triangles in the first row to the second row?" }, { "from": "gpt", "value": "The answer is 5:4" } ], "clip_score": 28.609375, "loss": 1.9974311590194702 }, { "id": "identity_185798", "image": "A-OKVQA/images/541493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many branches are in the vase?" }, { "from": "gpt", "value": "The answer is At least five" } ], "clip_score": 14.5703125, "loss": 3.95402455329895 }, { "id": "identity_163799", "image": "UniGeo/images/281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the perimeter of square b is 20, what is the side length of square b?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 31.8125, "loss": 2.177434206008911 }, { "id": "identity_156536", "image": "AI2D/images/3795.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What stage of the plant's life cycle is represented by label A?" }, { "from": "gpt", "value": "The answer is Flowering stage" } ], "clip_score": 35.0625, "loss": 3.1087536811828613 }, { "id": "identity_227190", "image": "AI2D/images/2044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the population of Lariosaurus increases, what will be the impact on the population of Saurosphargids, considering the entire food web?" }, { "from": "gpt", "value": "The answer is increase" } ], "clip_score": 23.765625, "loss": 5.360037803649902 }, { "id": "identity_124492", "image": "VQA2.0/images/303140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the texture of the ground the elephant is standing on?" }, { "from": "gpt", "value": "The answer is Dry and sandy" } ], "clip_score": 29.625, "loss": 2.6553471088409424 }, { "id": "identity_206203", "image": "TextVQA/images/38f74e47768c8b9b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the channel broadcasting the game?" }, { "from": "gpt", "value": "The answer is Rai 4" } ], "clip_score": 18.328125, "loss": 2.8432064056396484 }, { "id": "identity_247502", "image": "PMC-VQA/images/PMC8376391_F2_37658.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key differences between a high-resolution MRI and a CT scan in terms of imaging arterial walls, and why is high-resolution MRI preferred for this purpose?" }, { "from": "gpt", "value": "The answer is High-resolution MRI is preferred for detailed arterial wall imaging" } ], "clip_score": 29.015625, "loss": 2.333040952682495 }, { "id": "identity_230878", "image": "ScienceQA/images/9569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Troy and Johnny each have one item that the other wants, and they can trade only once, what is the most efficient trade they can make to maximize the number of items each person gets that they want?" }, { "from": "gpt", "value": "The answer is Troy trades tomatoes for Johnny's broccoli" } ], "clip_score": 23.953125, "loss": 2.120774984359741 }, { "id": "identity_197794", "image": "VQA2.0/images/490518.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What structure is present to the right of the Clock Tower?" }, { "from": "gpt", "value": "The answer is Band Shell" } ], "clip_score": 29.90625, "loss": 2.8891754150390625 }, { "id": "identity_225481", "image": "Geometry3K/images/1834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and \\( y \\) and then determine \\( m \\angle J \\)" }, { "from": "gpt", "value": "The answer is \\( 106^\\circ \\)" } ], "clip_score": 30.765625, "loss": 3.1708314418792725 }, { "id": "identity_275785", "image": "DVQA/images/bar_train_00107656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do fewer people prefer the object view in the category panel compared to the object cellar in the category farm?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.828125, "loss": 3.349264144897461 }, { "id": "identity_165015", "image": "GeoQA+/images/2034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, with points D and E on segments AB and BC respectively, and point F on segment AC, if BD = DE and EF = FC, what is the measure of angle BDE?" }, { "from": "gpt", "value": "The answer is 120 degrees" } ], "clip_score": 33.125, "loss": 1.5279557704925537 }, { "id": "identity_154953", "image": "AI2D/images/836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the life cycle diagram, which stage is represented by multiple individuals grouped together?" }, { "from": "gpt", "value": "The answer is Eggs" } ], "clip_score": 27.75, "loss": 1.8407642841339111 }, { "id": "identity_191629", "image": "A-OKVQA/images/412247.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the player's top?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 20.453125, "loss": 2.881211042404175 }, { "id": "identity_139929", "image": "DocVQA/images/xnnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hours for the statistics courses taken?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 15.21875, "loss": 2.373347759246826 }, { "id": "identity_24404", "image": "TextVQA/images/177482ed108062de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the brand of the orange bottle?" }, { "from": "gpt", "value": "The answer is brut" } ], "clip_score": 26.890625, "loss": 4.7624406814575195 }, { "id": "identity_125799", "image": "VQA2.0/images/481125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of recording device is placed on the right side of the image?" }, { "from": "gpt", "value": "The answer is Reel-to-reel tape recorder" } ], "clip_score": 22.421875, "loss": 1.0821048021316528 }, { "id": "identity_189497", "image": "A-OKVQA/images/175933.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the skateboarder wearing any protective gear?" }, { "from": "gpt", "value": "The answer is Yes, a helmet" } ], "clip_score": 24.453125, "loss": 3.0907130241394043 }, { "id": "identity_189685", "image": "TextVQA/images/54e98403b5b995fb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the video game series depicted in the image?" }, { "from": "gpt", "value": "The answer is Star Wars" } ], "clip_score": 25.375, "loss": 2.678591728210449 }, { "id": "identity_118793", "image": "TextVQA/images/882dce3f2ea4f1df.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number of the right page?" }, { "from": "gpt", "value": "The answer is 333" } ], "clip_score": 23.921875, "loss": 2.3613290786743164 }, { "id": "identity_260164", "image": "PMC-VQA/images/PMC7482117_f2_251609.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics in the image that indicate it is a Magnetic Resonance Imaging (MRI) scan, and what specific brain structure is being pointed to by the arrow?" }, { "from": "gpt", "value": "The answer is MRI scan, lesion in the brain" } ], "clip_score": 35.53125, "loss": 3.476780891418457 }, { "id": "identity_100493", "image": "TabMWP/images/34458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is the bulldozer compared to the hydrofoil?" }, { "from": "gpt", "value": "The answer is $1,834.00" } ], "clip_score": 29.53125, "loss": 1.9719569683074951 }, { "id": "identity_291882", "image": "ScienceQA/images/16373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states listed, which one is the southernmost? (A) Connecticut, (B) Alabama, (C) Pennsylvania, or (D) Kentucky." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.9375, "loss": 3.930638074874878 }, { "id": "identity_324374", "image": "GeoQA+/images/2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在一个平行四边形中,对角线的垂直平分线分别交两条边于两个点,连接其中一个点和一个顶点。若某个三角形的周长为3,则这个平行四边形的周长为()\nChoices:\n(A) 15\n(B) 12\n(C) 9\n(D) 6" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.75, "loss": 2.2828421592712402 }, { "id": "identity_115963", "image": "TextVQA/images/00ee02cc29bf07e2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the sign with a tree and flame symbol indicate?" }, { "from": "gpt", "value": "The answer is Flammable solid" } ], "clip_score": 15.7109375, "loss": 4.360270977020264 }, { "id": "identity_106041", "image": "A-OKVQA/images/406205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the lights on the trees?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.28125, "loss": 3.5169837474823 }, { "id": "identity_182480", "image": "PMC-VQA/images/PMC8433138_Fig3_56928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the circular object in the image?" }, { "from": "gpt", "value": "The answer is Approximately 1500 micrometers" } ], "clip_score": 18.359375, "loss": 1.9664233922958374 }, { "id": "identity_66373", "image": "ScienceQA/images/2477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.5, "loss": 2.5875234603881836 }, { "id": "identity_77495", "image": "ScienceQA/images/16569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the decomposer in this food web" }, { "from": "gpt", "value": "The answer is Mushroom" } ], "clip_score": 28.9375, "loss": 2.6731908321380615 }, { "id": "identity_73301", "image": "AI2D/images/1928.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trophic levels are present in this food web?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 32.71875, "loss": 4.4025654792785645 }, { "id": "identity_135654", "image": "FigureQA/images/93701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for Dark Khaki?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 24.140625, "loss": 2.123462438583374 }, { "id": "identity_173272", "image": "IconQA/images/20315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are removed at random, what is the probability that both are purple?" }, { "from": "gpt", "value": "The answer is 4/5 * 3/4 = 0.6" } ], "clip_score": 32.0, "loss": 1.752084732055664 }, { "id": "identity_48865", "image": "ChartQA/images/multi_col_60710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average of post-lockdown 2020 sales in billions GBP for the countries shown in the graph?" }, { "from": "gpt", "value": "The answer is 38.27" } ], "clip_score": 27.5625, "loss": 2.0477468967437744 }, { "id": "identity_232511", "image": "UniGeo/images/2569.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 在菱形ABCD中,AC=2,BD=5,点P是对角线AC上任意一点,过点P作PE∥AD,PF∥AB,交AB、AD分别为E、F,求图中阴影部分面积与菱形ABCD面积之比。" }, { "from": "gpt", "value": "The answer is \\(\\frac{1}{2}\\)" } ], "clip_score": 26.484375, "loss": 2.2228450775146484 }, { "id": "identity_256867", "image": "VQA2.0/images/406710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many different types of baked goods are visible on the table?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 22.828125, "loss": 1.8832963705062866 }, { "id": "identity_279581", "image": "TabMWP/images/6302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have a total of 41 athletes competing in an international sporting event, as researched by Stacy for her P.E. class project?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.6875, "loss": 2.237351894378662 }, { "id": "identity_193270", "image": "TextVQA/images/5e567720c4753203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hot air balloons are visible in the sky?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.546875, "loss": 2.391674757003784 }, { "id": "identity_24163", "image": "MapQA/images/map_5700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does New Jersey have the highest value in the USA ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.6875, "loss": 2.6119465827941895 }, { "id": "identity_98842", "image": "IconQA/images/93216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a piece of wood, how many pieces of the same length can be cut from a 1-meter long wood?" }, { "from": "gpt", "value": "The answer is 25 pieces" } ], "clip_score": 36.96875, "loss": 2.430215358734131 }, { "id": "identity_306130", "image": "DocVQA/images/jfgn0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which project is referenced in the approval?" }, { "from": "gpt", "value": "The answer is Pinnacle Acetabular Cup Survival Results" } ], "clip_score": 20.8125, "loss": 3.6398427486419678 }, { "id": "identity_78735", "image": "AI2D/images/3265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label \"Golgi complex\" refer to in the cell diagram?" }, { "from": "gpt", "value": "The answer is It modifies, sorts, and packages proteins and lipids" } ], "clip_score": 22.40625, "loss": 2.3243136405944824 }, { "id": "identity_263745", "image": "VQA2.0/images/33431.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals present in the image?" }, { "from": "gpt", "value": "The answer is many" } ], "clip_score": 20.0, "loss": 3.442814588546753 }, { "id": "identity_125599", "image": "PMC-VQA/images/PMC3407775_F1_147337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 33.0625, "loss": 2.824937105178833 }, { "id": "identity_5300", "image": "FigureQA/images/32681.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Green greater than Dark Blue?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 39.625, "loss": 4.062303066253662 }, { "id": "identity_215189", "image": "A-OKVQA/images/259099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person in the foreground engaging in?" }, { "from": "gpt", "value": "The answer is Snowboarding" } ], "clip_score": 28.125, "loss": 2.5115578174591064 }, { "id": "identity_311257", "image": "TQA/images/abc_question_images/earth_moon_phases_12531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label occurs when no light from the sun is being blocked by the Earth?\nChoices:\n(A) P\n(B) T\n(C) L\n(D) H" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.953125, "loss": 3.782536268234253 }, { "id": "identity_108694", "image": "VizWiz/images/VizWiz_train_00015553.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pocket on the shirt?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.125, "loss": 3.2649929523468018 }, { "id": "identity_203731", "image": "PMC-VQA/images/PMC6329071_Fig1_421479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Fluorescence microscopy" } ], "clip_score": 28.546875, "loss": 1.282693862915039 }, { "id": "identity_59791", "image": "DocVQA/images/fmmp0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the total cash and total securities received?" }, { "from": "gpt", "value": "The answer is $75,177.43" } ], "clip_score": 23.796875, "loss": 2.140367269515991 }, { "id": "identity_191442", "image": "PMC-VQA/images/PMC3328163_fig4_134168.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any signs of cerebral edema be observed in this CT scan?" }, { "from": "gpt", "value": "The answer is No signs of cerebral edema observed" } ], "clip_score": 34.53125, "loss": 2.114842653274536 }, { "id": "identity_308189", "image": "TabMWP/images/37094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many additional members are on the graduation committee compared to the field trip committee?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.734375, "loss": 2.4402005672454834 }, { "id": "identity_324187", "image": "TabMWP/images/5551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Maria is checking out the prices of new cell phones online. Her preferred company, OrangeTech, has a special offer this weekend. Maria can either add an upgrade to a phone for an extra cost or buy a used phone to get a discount. The price changes for each option are shown in the table. Which option results in a larger change in price?\nChoices:\n(A) adding an upgrade\n(B) buying a used phone" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.96875, "loss": 3.2611546516418457 }, { "id": "identity_319990", "image": "GeoQA+/images/4827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O是正六边形ABCDEF的外接圆,点P在⊙O上(P不与A,B重合),则∠APB的度数为多少?\nChoices:\n(A) 30°\n(B) 150°\n(C) 30°\n(D) 60°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.890625, "loss": 2.5216591358184814 }, { "id": "identity_12938", "image": "FigureQA/images/1214.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Tomato the minimum?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.609375, "loss": 3.9094696044921875 }, { "id": "identity_72946", "image": "TQA/images/abc_question_images/blastocyst_19025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the stage where the embryo is called a blastocyst?" }, { "from": "gpt", "value": "The answer is P" } ], "clip_score": 25.359375, "loss": 2.9112160205841064 }, { "id": "identity_273643", "image": "UniGeo/images/1647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, point A is located in the north-northeast direction from point B, and in the northwest direction from point C. What is the measure of angle BAC? \nChoices:\n(A) 30°\n(B) 45°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.4375, "loss": 2.581948757171631 }, { "id": "identity_232762", "image": "FigureQA/images/77833.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At which x-axis value does Light Seafoam first exceed Midnight Blue, and what are their corresponding y-axis values at that point?" }, { "from": "gpt", "value": "The answer is x = 40, Light Seafoam = 85, Midnight Blue = 80" } ], "clip_score": 28.734375, "loss": 1.4838937520980835 }, { "id": "identity_26277", "image": "A-OKVQA/images/302141.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What country is this location?\nChoices:\n(A) canada\n(B) spain\n(C) mexico\n(D) france" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.6875, "loss": 4.423878192901611 }, { "id": "identity_219155", "image": "VQA2.0/images/349268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all the individuals wearing gloves?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.046875, "loss": 3.1294827461242676 }, { "id": "identity_171530", "image": "TabMWP/images/24416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you buy one of each item, how much change will you get from $100?" }, { "from": "gpt", "value": "The answer is $21.00" } ], "clip_score": 24.671875, "loss": 1.8054497241973877 }, { "id": "identity_103373", "image": "A-OKVQA/images/208511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the scoreboard directly above the batter?" }, { "from": "gpt", "value": "The answer is ELITE SHORE" } ], "clip_score": 29.34375, "loss": 4.068368911743164 }, { "id": "identity_92800", "image": "IconQA/images/4296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the first van leaves and the rest move forward to fill the space, how many vans will be left?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 31.9375, "loss": 2.115410327911377 }, { "id": "identity_217753", "image": "A-OKVQA/images/427046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of equipment is advertised on the yellow umbrella?" }, { "from": "gpt", "value": "The answer is Desaxe" } ], "clip_score": 19.078125, "loss": 4.431777477264404 }, { "id": "identity_179974", "image": "TabMWP/images/37710.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does one need to buy 2 soccer tickets and 3 hockey tickets?" }, { "from": "gpt", "value": "The answer is $295.19" } ], "clip_score": 34.90625, "loss": 2.162637948989868 }, { "id": "identity_124903", "image": "PMC-VQA/images/PMC8624097_medicina-57-01276-f001_124169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the blood vessels in the retina?" }, { "from": "gpt", "value": "The answer is Normal, no apparent abnormalities" } ], "clip_score": 34.15625, "loss": 4.0885910987854 }, { "id": "identity_195111", "image": "PMC-VQA/images/PMC4455612_Fig1_393486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the slice number of this particular image in the series?" }, { "from": "gpt", "value": "The answer is 41" } ], "clip_score": 22.3125, "loss": 3.1716864109039307 }, { "id": "identity_197133", "image": "PMC-VQA/images/PMC7132119_fig0005_145183.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the lesion as indicated on the bottom right of the image?" }, { "from": "gpt", "value": "The answer is 2.5 cm" } ], "clip_score": 26.46875, "loss": 1.8391499519348145 }, { "id": "identity_203280", "image": "A-OKVQA/images/461248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the umbrellas seen in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.6875, "loss": 3.353473663330078 }, { "id": "identity_217599", "image": "A-OKVQA/images/65643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible signage in a language other than English?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.484375, "loss": 2.906787157058716 }, { "id": "identity_126206", "image": "VQA-AS/images/8477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is shown in the image?" }, { "from": "gpt", "value": "The answer is Wooden" } ], "clip_score": 21.5, "loss": 3.1357038021087646 }, { "id": "identity_104862", "image": "A-OKVQA/images/245793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date displayed on the image?" }, { "from": "gpt", "value": "The answer is 19.06.2013" } ], "clip_score": 13.8046875, "loss": 1.4101115465164185 }, { "id": "identity_183184", "image": "Super-CLEVR/images/superCLEVR_new_000932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of yellow objects in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.40625, "loss": 2.0843915939331055 }, { "id": "identity_206619", "image": "TextVQA/images/0007b54189a67423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color of the book's cover background?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 17.078125, "loss": 3.8344948291778564 }, { "id": "identity_172867", "image": "IconQA/images/44845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability of picking a black marble, replacing it, and then picking a purple marble?" }, { "from": "gpt", "value": "The answer is 0.24" } ], "clip_score": 34.09375, "loss": 2.412364959716797 }, { "id": "identity_307694", "image": "ScienceQA/images/1074.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic do these items share?\nChoices:\n(A) salty\n(B) sweet\n(C) rough" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.515625, "loss": 4.792668342590332 }, { "id": "identity_10053", "image": "DocVQA/images/fshk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in top of the Page ?" }, { "from": "gpt", "value": "The answer is page two" } ], "clip_score": 17.765625, "loss": 3.778789520263672 }, { "id": "identity_142631", "image": "DocVQA/images/hmwn0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pages are dedicated to the section titled \"The INTERVIEW\"?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 23.578125, "loss": 2.293729543685913 }, { "id": "identity_44654", "image": "DocVQA/images/zfkf0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What will not be reimbursed when traveling within the Chicago metropolitan area?" }, { "from": "gpt", "value": "The answer is automobile travel" } ], "clip_score": 19.15625, "loss": 3.803013563156128 }, { "id": "identity_114279", "image": "VQA-RAD/images/synpic33226.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of rib fractures on the right side?" }, { "from": "gpt", "value": "The answer is No evidence of rib fractures" } ], "clip_score": 31.9375, "loss": 2.6075925827026367 }, { "id": "identity_5153", "image": "Super-CLEVR/images/superCLEVR_new_000753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other thing that has the same size as the articulated bus?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.515625, "loss": 5.153135776519775 }, { "id": "identity_202663", "image": "TextVQA/images/2ced02cf09270a86.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many Coronet R/Ts were built in 1969 according to the Chrysler production records?" }, { "from": "gpt", "value": "The answer is 7000" } ], "clip_score": 30.015625, "loss": 2.2313146591186523 }, { "id": "identity_78793", "image": "AI2D/images/1082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Ribosomes\"?" }, { "from": "gpt", "value": "The answer is Protein synthesis" } ], "clip_score": 25.671875, "loss": 2.059532642364502 }, { "id": "identity_145681", "image": "DocVQA/images/qzlf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the American Heart Association's National Center?" }, { "from": "gpt", "value": "The answer is 7320 Greenville Avenue, Dallas, Texas 75231" } ], "clip_score": 21.140625, "loss": 0.9222440719604492 }, { "id": "identity_112814", "image": "VQA2.0/images/383197.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many green apples can be seen on the table?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.234375, "loss": 4.995830535888672 }, { "id": "identity_146944", "image": "DocVQA/images/fgpx0078_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color t-shirts were the agents wearing?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 18.640625, "loss": 3.2502846717834473 }, { "id": "identity_74179", "image": "AI2D/images/463.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which animal in the image is known for its echolocation ability?" }, { "from": "gpt", "value": "The answer is Dolphin" } ], "clip_score": 23.421875, "loss": 1.976055383682251 }, { "id": "identity_127323", "image": "Super-CLEVR/images/superCLEVR_new_000436.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that are the same color as the grass?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.984375, "loss": 3.347548723220825 }, { "id": "identity_223500", "image": "A-OKVQA/images/233119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the traffic light for the pedestrian crossing red or green?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 18.984375, "loss": 3.165219783782959 }, { "id": "identity_124736", "image": "A-OKVQA/images/111030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.125, "loss": 4.465523719787598 }, { "id": "identity_6126", "image": "VQA-AS/images/11938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the fire?" }, { "from": "gpt", "value": "The answer is fireplace" } ], "clip_score": 26.28125, "loss": 3.068530797958374 }, { "id": "identity_157200", "image": "ScienceQA/images/15914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest it is?" }, { "from": "gpt", "value": "The answer is Early morning or late afternoon" } ], "clip_score": 19.140625, "loss": 3.239142894744873 }, { "id": "identity_65441", "image": "TQA/images/question_images/optics_lense_types_9160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which lens would be most suitable for correcting nearsightedness?" }, { "from": "gpt", "value": "The answer is Double Concave Lens" } ], "clip_score": 34.125, "loss": 2.0367684364318848 }, { "id": "identity_238396", "image": "TQA/images/abc_question_images/parts_skeleton_17275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the bone labeled X and describe its anatomical significance and function" }, { "from": "gpt", "value": "The answer is Mandible, essential for chewing and speech" } ], "clip_score": 29.15625, "loss": 3.631223201751709 }, { "id": "identity_147146", "image": "DocVQA/images/trnf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page number of this letter?" }, { "from": "gpt", "value": "The answer is Page 2" } ], "clip_score": 14.3125, "loss": 2.5629255771636963 }, { "id": "identity_254259", "image": "IconQA/images/5834.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shape is not blue, and what is the sum of the numerators and denominators of the fractions representing the blue and non-blue parts?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.890625, "loss": 4.41391658782959 }, { "id": "identity_130247", "image": "A-OKVQA/images/574953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is parked in the background on the right?" }, { "from": "gpt", "value": "The answer is SUV" } ], "clip_score": 16.71875, "loss": 2.761791467666626 }, { "id": "identity_152992", "image": "AI2D/images/3459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the section labeled as 'Thorax'?" }, { "from": "gpt", "value": "The answer is It is the middle section of the body" } ], "clip_score": 22.609375, "loss": 2.7556705474853516 }, { "id": "identity_213454", "image": "A-OKVQA/images/50578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting in the image suggest?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 15.2109375, "loss": 2.7579572200775146 }, { "id": "identity_34691", "image": "IconQA/images/44180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) after nine." }, { "from": "gpt", "value": "The answer is quarter" } ], "clip_score": 26.28125, "loss": 5.158209800720215 }, { "id": "identity_325211", "image": "TextVQA/images/253ce025df6ab8c7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which city is mentioned on the poster?" }, { "from": "gpt", "value": "The answer is hamburg" } ], "clip_score": 27.15625, "loss": 3.5669918060302734 }, { "id": "identity_154619", "image": "ScienceQA/images/3003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many continents are fully visible in this image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 31.328125, "loss": 4.720196723937988 }, { "id": "identity_256957", "image": "PMC-VQA/images/PMC6678943_jcm-08-00996-f005_18468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific characteristics of the posterior white matter (WM) are visible in the MRI images, and how do they compare to the anterior white matter (WM) in terms of signal intensity and structural integrity?" }, { "from": "gpt", "value": "The answer is Posterior WM shows higher signal intensity and structural integrity differences" } ], "clip_score": 32.75, "loss": 3.107567071914673 }, { "id": "identity_151356", "image": "ScienceQA/images/7978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the geographical feature located to the west of the highlighted state?" }, { "from": "gpt", "value": "The answer is Chesapeake Bay" } ], "clip_score": 26.625, "loss": 2.840989351272583 }, { "id": "identity_113663", "image": "VizWiz/images/VizWiz_train_00001222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the character have eyebrows in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.84375, "loss": 3.3558101654052734 }, { "id": "identity_83634", "image": "GeoQA+/images/1823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the degree measure of the major arc ABC?" }, { "from": "gpt", "value": "The answer is 250 degrees" } ], "clip_score": 30.265625, "loss": 2.4576902389526367 }, { "id": "identity_101949", "image": "IconQA/images/11589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many marbles are in the top row?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 22.171875, "loss": 1.9973787069320679 }, { "id": "identity_181838", "image": "CLEVR-Math/images/CLEVR_train_007702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple objects are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.734375, "loss": 2.3678064346313477 }, { "id": "identity_144300", "image": "DocVQA/images/zkvm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the application addressed to?" }, { "from": "gpt", "value": "The answer is Mrs. J. Heather Etheridge" } ], "clip_score": 26.328125, "loss": 3.0780553817749023 }, { "id": "identity_33002", "image": "VQA2.0/images/470762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the street empty?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 21.453125, "loss": 4.1035919189453125 }, { "id": "identity_202352", "image": "VQA2.0/images/166813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person in the center performing?" }, { "from": "gpt", "value": "The answer is Skateboarding" } ], "clip_score": 28.125, "loss": 2.047933578491211 }, { "id": "identity_68649", "image": "ScienceQA/images/18419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many legs does the table have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.203125, "loss": 4.380188465118408 }, { "id": "identity_39720", "image": "DocVQA/images/lggn0020_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the log date and time for the last update on the Camel Q2 Roll Your Own program?" }, { "from": "gpt", "value": "The answer is 10/15/99 12:17" } ], "clip_score": 20.71875, "loss": 1.755060076713562 }, { "id": "identity_191720", "image": "PMC-VQA/images/PMC8024328_Fig1_410813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many screws are visible in the spinal hardware?" }, { "from": "gpt", "value": "The answer is Seven screws" } ], "clip_score": 31.71875, "loss": 3.376789093017578 }, { "id": "identity_167600", "image": "IconQA/images/95132.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of white dots on all the marbles combined?" }, { "from": "gpt", "value": "The answer is 42" } ], "clip_score": 30.40625, "loss": 2.0814781188964844 }, { "id": "identity_152487", "image": "AI2D/images/246.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is the tertiary consumer in this food chain?" }, { "from": "gpt", "value": "The answer is Either the organism labeled as 'Carnivore' next to the bird or the bird itself" } ], "clip_score": 29.25, "loss": 2.8178153038024902 }, { "id": "identity_87592", "image": "GeoQA+/images/3766.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the arc CD in the given circle if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is (5 * π) units" } ], "clip_score": 32.46875, "loss": 2.805610418319702 }, { "id": "identity_51300", "image": "DocVQA/images/mxdw0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is The Nutrition Foundation A Perspective" } ], "clip_score": 37.4375, "loss": 1.675635576248169 }, { "id": "identity_182084", "image": "A-OKVQA/images/291140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the beach houses' roofs?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 28.1875, "loss": 3.461587905883789 }, { "id": "identity_34585", "image": "IconQA/images/52200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There are 5 rubber ducks in the top ten frame. How many rubber ducks are in the bottom ten frame?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.15625, "loss": 2.150639772415161 }, { "id": "identity_218166", "image": "VQA-AS/images/19106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 21.28125, "loss": 3.293316602706909 }, { "id": "identity_303875", "image": "Geometry3K/images/1726.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, m \\angle 1 = 53. Determine the measure of \\angle 2.\nChoices:\n(A) 97\n(B) 107\n(C) 117\n(D) 127" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.21875, "loss": 2.1902992725372314 }, { "id": "identity_70640", "image": "ScienceQA/images/6594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years are between Adolf Hitler becoming Chancellor of Germany and the United States entering World War II?" }, { "from": "gpt", "value": "The answer is 8 years" } ], "clip_score": 34.46875, "loss": 1.641790747642517 }, { "id": "identity_200148", "image": "VQA2.0/images/535164.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any structures on top of the hill?" }, { "from": "gpt", "value": "The answer is Yes, there are gazebos" } ], "clip_score": 25.046875, "loss": 2.572432279586792 }, { "id": "identity_247677", "image": "PMC-VQA/images/PMC8374203_fig0002_37186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the visible characteristics in the image, what type of growth pattern is exhibited by the fungi, and what does this indicate about the environmental conditions?" }, { "from": "gpt", "value": "The answer is Uniform nutrient availability and lack of competition" } ], "clip_score": 18.671875, "loss": 3.4353857040405273 }, { "id": "identity_292657", "image": "A-OKVQA/images/199136.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal is the man on the horse looking at?\nChoices:\n(A) bull\n(B) boar\n(C) bear\n(D) panther" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.890625, "loss": 4.300055980682373 }, { "id": "identity_318880", "image": "A-OKVQA/images/459997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is this?\nChoices:\n(A) tour bus\n(B) shuttle bus\n(C) double decker\n(D) public transportation" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.203125, "loss": 4.252484321594238 }, { "id": "identity_85284", "image": "UniGeo/images/1521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC with DE parallel to BC, if AD:DB = 2:3 and the perimeter of triangle ABC is 24 units, what is the length of side AB?" }, { "from": "gpt", "value": "The answer is 10 units" } ], "clip_score": 35.15625, "loss": 2.0399911403656006 }, { "id": "identity_158527", "image": "ScienceQA/images/2323.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What ocean is depicted at the bottom of the image?" }, { "from": "gpt", "value": "The answer is Southern Ocean" } ], "clip_score": 25.9375, "loss": 2.5478909015655518 }, { "id": "identity_154340", "image": "TQA/images/question_images/cycle_nitrogen_6716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of denitrifying prokaryotes in the nitrogen cycle according to the image?" }, { "from": "gpt", "value": "The answer is Convert nitrates to atmospheric nitrogen (N2)" } ], "clip_score": 36.78125, "loss": 1.76093327999115 }, { "id": "identity_210887", "image": "A-OKVQA/images/463306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the skateboarder above or below the rim of the bowl?" }, { "from": "gpt", "value": "The answer is Above" } ], "clip_score": 30.6875, "loss": 2.8385884761810303 }, { "id": "identity_304220", "image": "A-OKVQA/images/450592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are these white horses used for?\nChoices:\n(A) pulling carriage\n(B) breeding\n(C) racing\n(D) tilling land" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.75, "loss": 4.068846702575684 }, { "id": "identity_218957", "image": "TextVQA/images/012929dbb05a8ddf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What period of the game is it currently?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 16.828125, "loss": 2.319920778274536 }, { "id": "identity_30118", "image": "Super-CLEVR/images/superCLEVR_new_001203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brown wagons are right of the large metallic thing to the right of the small rubber airliner?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 29.84375, "loss": 2.8548853397369385 }, { "id": "identity_199583", "image": "PMC-VQA/images/PMC9188905_Fig1_309252.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any abnormalities in the lung fields?" }, { "from": "gpt", "value": "The answer is No visible abnormalities in the lung fields" } ], "clip_score": 28.53125, "loss": 2.083643913269043 }, { "id": "identity_204768", "image": "A-OKVQA/images/263042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible legs does the elephant have?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.703125, "loss": 4.552616119384766 }, { "id": "identity_314145", "image": "Geometry3K/images/1118.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 4.2\n(B) 4.65\n(C) 5.6\n(D) 8.4" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.5, "loss": 2.8338370323181152 }, { "id": "identity_87942", "image": "UniGeo/images/501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,若圆锥的底面圆半径AC为6cm,高PC为8cm,求圆锥的体积" }, { "from": "gpt", "value": "The answer is 96π cm^3" } ], "clip_score": 24.375, "loss": 2.0309982299804688 }, { "id": "identity_22964", "image": "TQA/images/abc_question_images/human_system_digestive_16085.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label shows the liver?\nChoices:\n(A) T\n(B) W\n(C) C\n(D) P" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.53125, "loss": 4.4982500076293945 }, { "id": "identity_151087", "image": "ScienceQA/images/17442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance between the magnets in Pair 2?" }, { "from": "gpt", "value": "The answer is 21.1 mm" } ], "clip_score": 21.8125, "loss": 2.2330634593963623 }, { "id": "identity_40331", "image": "FigureQA/images/41571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 28.4375, "loss": 3.8324172496795654 }, { "id": "identity_75177", "image": "AI2D/images/3285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure that stores water and waste in the cell?" }, { "from": "gpt", "value": "The answer is Vacuole" } ], "clip_score": 26.90625, "loss": 1.9031217098236084 }, { "id": "identity_125114", "image": "PMC-VQA/images/PMC7052476_fig0005_127377.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to obtain these images?" }, { "from": "gpt", "value": "The answer is X-ray imaging" } ], "clip_score": 30.96875, "loss": 1.7482104301452637 }, { "id": "identity_266391", "image": "PlotQA/images/104303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are most of the years between 2007 and 2008 (inclusive) characterized by a total debt service exceeding 6.8%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.640625, "loss": 3.3879342079162598 }, { "id": "identity_211844", "image": "VQA2.0/images/381163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the speed limit on the road where the red bus is located?" }, { "from": "gpt", "value": "The answer is 50 km/h" } ], "clip_score": 21.21875, "loss": 1.4965397119522095 }, { "id": "identity_323156", "image": "MapQA/images/map_8415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this symbol in the legend represent the smallest category?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 14.2578125, "loss": 3.1025373935699463 }, { "id": "identity_329549", "image": "VizWiz/images/VizWiz_train_00006665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What do you see in this image?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 17.1875, "loss": 4.383904457092285 }, { "id": "identity_233922", "image": "Geometry3K/images/754.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the area of triangle EFG using the altitude drawn to the hypotenuse and verify it using the side lengths" }, { "from": "gpt", "value": "The answer is 72" } ], "clip_score": 36.46875, "loss": 3.0502140522003174 }, { "id": "identity_109652", "image": "TextVQA/images/a36f7a1a79fb312b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the price of the pink mixer on display?" }, { "from": "gpt", "value": "The answer is $299.99" } ], "clip_score": 35.84375, "loss": 1.5435206890106201 }, { "id": "identity_156038", "image": "ScienceQA/images/10003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which ocean is situated to the east of the African continent?" }, { "from": "gpt", "value": "The answer is Indian Ocean" } ], "clip_score": 29.640625, "loss": 2.6024394035339355 }, { "id": "identity_169080", "image": "IconQA/images/73288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more diamonds in the first row or the second row?" }, { "from": "gpt", "value": "The answer is The first row has more diamonds" } ], "clip_score": 29.96875, "loss": 2.1574082374572754 }, { "id": "identity_225341", "image": "FigureQA/images/76711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the maximum values of Maroon and Dark Orange, and at which x-axis value does this maximum difference occur?" }, { "from": "gpt", "value": "The answer is 5 at x-axis value 100" } ], "clip_score": 29.40625, "loss": 2.344839572906494 }, { "id": "identity_128455", "image": "Super-CLEVR/images/superCLEVR_new_000234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Gray" } ], "clip_score": 19.25, "loss": 4.79545259475708 }, { "id": "identity_298036", "image": "Geometry3K/images/36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 120\n(B) 135\n(C) 145\n(D) 160" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 2.457690477371216 }, { "id": "identity_262018", "image": "TabMWP/images/27700.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among Lorenzo, Nellie, Whitney, and Edward, who collected the highest number of newspapers for the recycling project in Lorenzo's class?\nChoices:\n(A) Lorenzo\n(B) Nellie\n(C) Whitney\n(D) Edward" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.84375, "loss": 3.1015284061431885 }, { "id": "identity_311336", "image": "VizWiz/images/VizWiz_train_00008371.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on this shirt?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 25.828125, "loss": 4.4053544998168945 }, { "id": "identity_49168", "image": "DocVQA/images/xhvg0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the retail value of shipments received in dollars?" }, { "from": "gpt", "value": "The answer is $23,380,968" } ], "clip_score": 23.28125, "loss": 2.2667386531829834 }, { "id": "identity_49727", "image": "DocVQA/images/fzfm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What city and state is the telegram being sent to?" }, { "from": "gpt", "value": "The answer is ZAMALEK, CAIRO, EGYPT U.A.R" } ], "clip_score": 29.15625, "loss": 2.257692813873291 }, { "id": "identity_55849", "image": "MapQA/images/map_11039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with unavailable data on the percentage of flowers?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 33.8125, "loss": 2.7607035636901855 }, { "id": "identity_314530", "image": "Super-CLEVR/images/superCLEVR_new_001418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the other small object that is the same shape as the yellow object?" }, { "from": "gpt", "value": "The answer is cyan" } ], "clip_score": 19.640625, "loss": 3.781097650527954 }, { "id": "identity_206410", "image": "PMC-VQA/images/PMC6430261_Fig5_455714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of this cross-sectional image?" }, { "from": "gpt", "value": "The answer is Horizontal or axial" } ], "clip_score": 21.390625, "loss": 3.4045190811157227 }, { "id": "identity_257934", "image": "PMC-VQA/images/PMC5645507_F1_228230.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific medical condition could be assessed using the type of maps shown in the image, and what are the key indicators in the maps that suggest this condition?" }, { "from": "gpt", "value": "The answer is Ischemic stroke, indicated by red areas showing delayed perfusion" } ], "clip_score": 31.5625, "loss": 3.7067506313323975 }, { "id": "identity_332846", "image": "VQA2.0/images/180641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is over this animal's mouth?" }, { "from": "gpt", "value": "The answer is muzzle" } ], "clip_score": 16.71875, "loss": 1.9180831909179688 }, { "id": "identity_225023", "image": "FigureQA/images/46212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much is Dim Gray greater than Chartreuse?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 27.296875, "loss": 1.9428250789642334 }, { "id": "identity_205661", "image": "VQA2.0/images/295428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many hanging garlic strings are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 14.9765625, "loss": 2.440558433532715 }, { "id": "identity_275577", "image": "UniGeo/images/20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, points A, B, and C are all on circle O, with ∠ACB measuring 35°. What is the degree measure of ∠AOB?\nChoices:\n(A) 20°\n(B) 40°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.03125, "loss": 2.8371682167053223 }, { "id": "identity_264387", "image": "GeoQA+/images/2585.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, quadrilateral ABCD is inscribed in circle O. If ∠B = 130°, what is the measure of ∠AOC?\nChoices:\n(A) 130°\n(B) 120°\n(C) 110°\n(D) 100°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.6875, "loss": 2.505145788192749 }, { "id": "identity_118311", "image": "PMC-VQA/images/PMC7416986_FIG4_232070.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any markers or annotations on the image besides the arrows?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.875, "loss": 3.0741653442382812 }, { "id": "identity_95322", "image": "TabMWP/images/2903.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a banana cream pie cost than a chocolate cheesecake?" }, { "from": "gpt", "value": "The answer is $1.57" } ], "clip_score": 34.40625, "loss": 2.0042924880981445 }, { "id": "identity_88227", "image": "UniGeo/images/2487.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle O, if ∠AOC = 70°, what is the measure of ∠OBC?" }, { "from": "gpt", "value": "The answer is 55°" } ], "clip_score": 36.71875, "loss": 2.1171939373016357 }, { "id": "identity_326341", "image": "PMC-VQA/images/PMC7600865_medicina-56-00534-f002_287456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the lesion located in the initial pelvic axial CT scan?\nChoices:\n(A) Left ilium near the SI joint\n(B) Right ilium near the sacrum and coccyx\n(C) Right ilium near the SI joint\n(D) Left ilium near the sacrum and coccyx" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.53125, "loss": 4.184946537017822 }, { "id": "identity_158190", "image": "TQA/images/question_images/photosynthesis_7481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two main products of photosynthesis as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Oxygen and Food" } ], "clip_score": 33.5625, "loss": 2.134450912475586 }, { "id": "identity_249339", "image": "A-OKVQA/images/390539.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the cop's uniform shares the most functional similarity with the motorcycle's handlebars, considering the cop's role and the motorcycle's design?" }, { "from": "gpt", "value": "The answer is gloves" } ], "clip_score": 32.625, "loss": 3.200408935546875 }, { "id": "identity_95187", "image": "CLEVR-Math/images/CLEVR_train_018584.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are neither cylinders nor blue in color?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.859375, "loss": 2.443145990371704 }, { "id": "identity_131239", "image": "VizWiz/images/VizWiz_train_00009523.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is the dog sitting on?" }, { "from": "gpt", "value": "The answer is Wooden" } ], "clip_score": 26.0625, "loss": 3.157114028930664 }, { "id": "identity_292629", "image": "PlotQA/images/54720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the interval between two successive major ticks on the X-axis?" }, { "from": "gpt", "value": "The answer is 0.5" } ], "clip_score": 13.71875, "loss": 2.1650726795196533 }, { "id": "identity_220282", "image": "VQA-AS/images/5091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.390625, "loss": 4.611902713775635 }, { "id": "identity_90443", "image": "GeoQA+/images/4477.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line AB is parallel to line CD and AD is perpendicular to BC at point E, and the measure of angle ABE is 32 degrees, what is the measure of angle BCD?" }, { "from": "gpt", "value": "The answer is 148 degrees" } ], "clip_score": 31.859375, "loss": 1.52910315990448 }, { "id": "identity_205653", "image": "PMC-VQA/images/PMC9175008_F4_304069.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is CT angiography" } ], "clip_score": 26.703125, "loss": 3.2403345108032227 }, { "id": "identity_324148", "image": "VQA2.0/images/187559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the basket that the animals are eating from?" }, { "from": "gpt", "value": "The answer is cone" } ], "clip_score": 19.328125, "loss": 4.229569435119629 }, { "id": "identity_209128", "image": "PMC-VQA/images/PMC6878647_Fig2_74203.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a central point where all the lines in the optical coherence tomography image converge?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.125, "loss": 2.952069044113159 }, { "id": "identity_151770", "image": "ScienceQA/images/1285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the initial concentration of solute particles on the right side of the membrane compared to the left side?" }, { "from": "gpt", "value": "The answer is Higher" } ], "clip_score": 30.078125, "loss": 3.44399356842041 }, { "id": "identity_266740", "image": "MapQA/images/map_6279.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any missing data on the map?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.703125, "loss": 2.855461597442627 }, { "id": "identity_118627", "image": "VQA2.0/images/567234.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction does the \"ONE WAY\" sign indicate traffic should move?" }, { "from": "gpt", "value": "The answer is To the right" } ], "clip_score": 26.921875, "loss": 3.1949234008789062 }, { "id": "identity_200579", "image": "PMC-VQA/images/PMC6543836_F2_487871.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage zoom level used for both images?" }, { "from": "gpt", "value": "The answer is 166%" } ], "clip_score": 20.671875, "loss": 2.616269111633301 }, { "id": "identity_289342", "image": "FigureQA/images/88908.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly states whether Forest Green is the smoothest?\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.03125, "loss": 3.960901975631714 }, { "id": "identity_236179", "image": "Super-CLEVR/images/superCLEVR_new_001785.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image are either big or purple, and how many of those are metallic?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 27.21875, "loss": 3.474203109741211 }, { "id": "identity_297079", "image": "Super-CLEVR/images/superCLEVR_new_001505.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many small yellow matte objects are located to the left of the yellow rubber bicycle and to the right of the yellow matte regular bus?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 23.703125, "loss": 2.5552027225494385 }, { "id": "identity_29578", "image": "PMC-VQA/images/PMC7160939_Fig2_152756.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of injury is indicated by the image? \nChoices:\n(A) Fracture \n(B) Ligament tear \n(C) Muscle strain \n(D) None of the above. " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.15625, "loss": 4.403656959533691 }, { "id": "identity_152093", "image": "ScienceQA/images/2836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which body of water is to the east of the highlighted state?" }, { "from": "gpt", "value": "The answer is Atlantic Ocean" } ], "clip_score": 24.71875, "loss": 2.7477834224700928 }, { "id": "identity_236096", "image": "DocVQA/images/ymkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the recipient of the letter, including the committee name?" }, { "from": "gpt", "value": "The answer is E. V. McCollum Commemorative Committee, 8916 Walden Road, Silver Spring, Maryland 20901" } ], "clip_score": 28.109375, "loss": 2.197359085083008 }, { "id": "identity_300593", "image": "Super-CLEVR/images/superCLEVR_new_000426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many small green shiny things are behind the green shiny object to the left of the large red metal aeroplane?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 25.625, "loss": 2.56522798538208 }, { "id": "identity_210350", "image": "VQA2.0/images/232546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the lamp post?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 20.671875, "loss": 3.414583921432495 }, { "id": "identity_291735", "image": "A-OKVQA/images/470984.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which method is the most efficient for returning to the top of the hill? \nOptions:\n(A) running\n(B) walking\n(C) ski lift\n(D) drive" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.921875, "loss": 4.684670925140381 }, { "id": "identity_33874", "image": "TextVQA/images/191919a609677906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: where can i buy these fireworks?" }, { "from": "gpt", "value": "The answer is standard fireworks" } ], "clip_score": 23.984375, "loss": 1.8612300157546997 }, { "id": "identity_221632", "image": "TextVQA/images/e12cac7f4cd927a5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing a cap in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 20.59375, "loss": 4.921142101287842 }, { "id": "identity_320089", "image": "IconQA/images/95966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a dog. Which image is fourth?\nChoices:\n(A) house\n(B) paw\n(C) dog" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.1875, "loss": 4.547595500946045 }, { "id": "identity_252601", "image": "A-OKVQA/images/72011.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two seasons that occur before and after the current season?" }, { "from": "gpt", "value": "The answer is summer and winter" } ], "clip_score": 18.53125, "loss": 2.560343027114868 }, { "id": "identity_135861", "image": "DocVQA/images/jrmp0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which faculty member's information can be found on page 9?" }, { "from": "gpt", "value": "The answer is Charles L. Cooney, Ph.D" } ], "clip_score": 20.3125, "loss": 1.9701154232025146 }, { "id": "identity_224718", "image": "AI2D/images/3242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the regions in the diagram that represent the cytoplasm and the organelles within it, excluding the nucleus" }, { "from": "gpt", "value": "The answer is Golgi apparatus, smooth ER, granular ER, mitochondria, lysosomes, ribosomes, centrioles" } ], "clip_score": 31.9375, "loss": 1.3833547830581665 }, { "id": "identity_21454", "image": "ChartQA/images/10771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average among all LGBT groups that are a lot accepted?" }, { "from": "gpt", "value": "The answer is 0.168" } ], "clip_score": 35.65625, "loss": 2.5808448791503906 }, { "id": "identity_96699", "image": "IconQA/images/36731.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dots are there in total on the dice shown in the image?" }, { "from": "gpt", "value": "The answer is 31" } ], "clip_score": 34.8125, "loss": 2.4625346660614014 }, { "id": "identity_324353", "image": "VQA-AS/images/6215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this?" }, { "from": "gpt", "value": "The answer is plant" } ], "clip_score": 16.265625, "loss": 3.463181734085083 }, { "id": "identity_128818", "image": "TextVQA/images/7b2750bcff985a2c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the background in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 17.46875, "loss": 3.6233115196228027 }, { "id": "identity_246036", "image": "VQA2.0/images/174615.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles are shown, and how many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is 3 poles and 4 trees" } ], "clip_score": 16.03125, "loss": 1.7886004447937012 }, { "id": "identity_164178", "image": "UniGeo/images/2711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD in the parallelogram ABCD?" }, { "from": "gpt", "value": "The answer is 35 degrees" } ], "clip_score": 35.8125, "loss": 2.2180697917938232 }, { "id": "identity_194174", "image": "A-OKVQA/images/483569.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus in the center of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 27.109375, "loss": 2.933478832244873 }, { "id": "identity_214232", "image": "PMC-VQA/images/PMC7201798_Fig1_164065.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality is indicated by the red arrow in the CT image?" }, { "from": "gpt", "value": "The answer is Lung nodule" } ], "clip_score": 36.5, "loss": 3.1698851585388184 }, { "id": "identity_3642", "image": "ScienceQA/images/2066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which property matches this object?\nChoices:\n(A) sour\n(B) sticky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.734375, "loss": 4.403824806213379 }, { "id": "identity_83413", "image": "UniGeo/images/2261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of parallelogram ABCD is 32, what is the height of the parallelogram when AB is the base?" }, { "from": "gpt", "value": "The answer is 16/7 or approximately 2.29" } ], "clip_score": 33.25, "loss": 1.884468674659729 }, { "id": "identity_301179", "image": "DVQA/images/bar_train_00112689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What category does this color represent?" }, { "from": "gpt", "value": "The answer is reason" } ], "clip_score": 23.84375, "loss": 3.648953914642334 }, { "id": "identity_230748", "image": "DVQA/images/bar_train_00180657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms that have accuracies higher than 1?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 35.46875, "loss": 2.3246257305145264 }, { "id": "identity_53145", "image": "DocVQA/images/mpcw0217_23.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which paper was submitted to J Women's Health Gender-Based Med.?" }, { "from": "gpt", "value": "The answer is Ke (BB #3)" } ], "clip_score": 18.78125, "loss": 4.660710334777832 }, { "id": "identity_242646", "image": "A-OKVQA/images/400507.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Why is visibility important for the men wearing green vests in the context of the airport environment shown in the image?" }, { "from": "gpt", "value": "The answer is To ensure safety and efficient coordination in a busy airport" } ], "clip_score": 22.75, "loss": 3.25817608833313 }, { "id": "identity_106451", "image": "VQA2.0/images/576780.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vegetables visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.515625, "loss": 3.2906081676483154 }, { "id": "identity_201638", "image": "PMC-VQA/images/PMC7966795_Fig5_393286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is used to indicate the left breast in the CT scans?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 23.84375, "loss": 4.671329021453857 }, { "id": "identity_251749", "image": "A-OKVQA/images/400592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many electronic devices are visible on the messy desk, and what are they?" }, { "from": "gpt", "value": "The answer is five" } ], "clip_score": 28.84375, "loss": 5.471255302429199 }, { "id": "identity_47549", "image": "PlotQA/images/99007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Latvia have the lowest percentage of value added in manufacturing by textiles and clothing?" }, { "from": "gpt", "value": "The answer is 1998" } ], "clip_score": 36.375, "loss": 1.8277125358581543 }, { "id": "identity_295459", "image": "A-OKVQA/images/538775.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which decorative element is located in the center of the clock face?\nChoices:\n(A) circle\n(B) square\n(C) moon\n(D) sun" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.8828125, "loss": 3.594005584716797 }, { "id": "identity_132408", "image": "PMC-VQA/images/PMC2734940_fig2_44592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the cyst in the MRI image appear to have well-defined or blurred edges?" }, { "from": "gpt", "value": "The answer is Well-defined edges" } ], "clip_score": 32.8125, "loss": 2.6506946086883545 }, { "id": "identity_202832", "image": "TextVQA/images/0222dde37ac1175c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the text written on the jersey second from the right?" }, { "from": "gpt", "value": "The answer is Philadelphia" } ], "clip_score": 22.0625, "loss": 4.9169793128967285 }, { "id": "identity_336025", "image": "A-OKVQA/images/554040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the green stuff in this bottle most likely?\nChoices:\n(A) soap\n(B) chalk\n(C) jam\n(D) jelly" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 13.0625, "loss": 4.506597995758057 }, { "id": "identity_206987", "image": "A-OKVQA/images/567683.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle in the center of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 23.546875, "loss": 2.556920289993286 }, { "id": "identity_82103", "image": "Geometry3K/images/707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle KJH?" }, { "from": "gpt", "value": "The answer is 156 degrees" } ], "clip_score": 33.84375, "loss": 2.3773515224456787 }, { "id": "identity_110341", "image": "TextVQA/images/043a59888f5bcb7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of products are placed on the top shelf of the wooden rack?" }, { "from": "gpt", "value": "The answer is Cleaning sprays" } ], "clip_score": 30.96875, "loss": 2.650481939315796 }, { "id": "identity_252876", "image": "TabMWP/images/6642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which activities have a duration of more than one hour and end before 4:00 P.M.?" }, { "from": "gpt", "value": "The answer is yoga and volleyball" } ], "clip_score": 30.859375, "loss": 2.0212509632110596 }, { "id": "identity_190404", "image": "A-OKVQA/images/361009.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a phone number visible on the food truck?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.796875, "loss": 3.141205310821533 }, { "id": "identity_22142", "image": "TQA/images/question_images/seasons_6289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many seasons of the earth are shown?\nChoices:\n(A) 3\n(B) 4\n(C) 2\n(D) 1" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.15625, "loss": 3.408773422241211 }, { "id": "identity_104044", "image": "PMC-VQA/images/PMC4236490_Fig2_337300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the medical term for the type of abnormality shown in the image?" }, { "from": "gpt", "value": "The answer is Lesion" } ], "clip_score": 26.234375, "loss": 3.2725746631622314 }, { "id": "identity_31548", "image": "PMC-VQA/images/PMC4801854_F2_13423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing?\nChoices:\n(A) Bacterial colony on agar plate \n(B) Mimivirus viral factory in Acanthamoeba polyphaga \n(C) Fungal hyphae under microscope \n(D) E. coli morphology under electron microscope " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.375, "loss": 4.1403937339782715 }, { "id": "identity_95818", "image": "TabMWP/images/25654.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest price at which the quantity demanded exceeds the quantity supplied?" }, { "from": "gpt", "value": "The answer is $40" } ], "clip_score": 32.65625, "loss": 2.3905673027038574 }, { "id": "identity_68014", "image": "TQA/images/abc_question_images/photosynthesis_14130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the letter J indicate in this diagram?" }, { "from": "gpt", "value": "The answer is J" } ], "clip_score": 27.390625, "loss": 3.1848888397216797 }, { "id": "identity_251324", "image": "ScienceQA/images/20106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the magnetic force between the magnets in Pair 1 and Pair 2 if the magnetic field strength of each magnet is 0.5 Tesla and the distance between the magnets is 43.2 mm. Assume the magnetic force \\( F \\) between two magnets is given by the formula \\( F = \\frac{{\\mu_0 \\cdot m_1 \\cdot m_2}}{{4\\pi \\cdot d^2}} \\), where \\( \\mu_0 \\) is the permeability of free space (\\( 4\\pi \\times 10^{-7} \\, \\text{T}\\cdot\\text{m/A} \\)), \\( m_1 \\) and \\( m_2 \\) are the magnetic moments of the magnets, and \\( d \\) is the distance between the magnets" }, { "from": "gpt", "value": "The answer is \\( 5.36 \\times 10^{-5} \\, \\text{N} \\)" } ], "clip_score": 15.71875, "loss": 1.226691722869873 }, { "id": "identity_86369", "image": "UniGeo/images/2548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ABCD is a rectangle and DE is parallel to AC, what is the length of segment BE if BC = 8 units?" }, { "from": "gpt", "value": "The answer is 8 units" } ], "clip_score": 32.75, "loss": 2.3350794315338135 }, { "id": "identity_295969", "image": "DocVQA/images/tsnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the title of the document?" }, { "from": "gpt", "value": "The answer is Curriculum Vitae" } ], "clip_score": 19.71875, "loss": 1.6880543231964111 }, { "id": "identity_288047", "image": "CLEVR-Math/images/CLEVR_train_065793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 3 blue blocks, how many blue blocks remain?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.125, "loss": 2.5377891063690186 }, { "id": "identity_28432", "image": "IconQA/images/21235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many marbles are there?\nChoices:\n(A) 4\n(B) 1\n(C) 5\n(D) 3\n(E) 2" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.265625, "loss": 3.3765251636505127 }, { "id": "identity_161703", "image": "GeoQA+/images/3169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,CD是⊙O的弦,∠ADC=35°,求三角形ADC的面积,已知CD=6cm" }, { "from": "gpt", "value": "The answer is 6√3 cm²" } ], "clip_score": 31.265625, "loss": 1.634182333946228 }, { "id": "identity_256128", "image": "VQA2.0/images/412986.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key factors that indicate he is maintaining balance while skateboarding down the rail?" }, { "from": "gpt", "value": "The answer is Body position, arm movement, foot placement, rail contact, surroundings" } ], "clip_score": 30.171875, "loss": 3.369658946990967 }, { "id": "identity_19384", "image": "Super-CLEVR/images/superCLEVR_new_000396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of metallic cars have the same size as the metal aeroplane?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 26.1875, "loss": 2.332132577896118 }, { "id": "identity_82336", "image": "GeoQA+/images/1752.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle OAB in the given circle?" }, { "from": "gpt", "value": "The answer is 100 degrees" } ], "clip_score": 34.5625, "loss": 2.0040292739868164 }, { "id": "identity_316920", "image": "GeoQA+/images/3071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,G是边BC上的一点,D、E、F分别是AG、BD、CE的中点,S△ABC=48,则S△DEF的值为()\nChoices:\n(A) 4.8\n(B) 6\n(C) 8\n(D) 12" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 2.6414647102355957 }, { "id": "identity_333415", "image": "VQA2.0/images/38850.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are not wearing shirts?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 13.4375, "loss": 2.516209840774536 }, { "id": "identity_211477", "image": "VQA2.0/images/19002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest?" }, { "from": "gpt", "value": "The answer is Noon or early afternoon" } ], "clip_score": 18.5625, "loss": 3.182856559753418 }, { "id": "identity_12308", "image": "UniGeo/images/2618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,平行四边形ABCD中,AB=6,BC=8,AC的垂直平分线交AD于E,则三角形CDE的周长是()\nChoices:\n(A) 6\n(B) 8\n(C) 14\n(D) 16" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.5, "loss": 2.44958233833313 }, { "id": "identity_328806", "image": "PMC-VQA/images/PMC8035965_FIG2_413436.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the body was scanned in this CT? \nChoices:\n(A) Head \n(B) Neck \n(C) Abdomen \n(D) Legs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.25, "loss": 4.15398645401001 }, { "id": "identity_183815", "image": "VQA2.0/images/382104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the image?" }, { "from": "gpt", "value": "The answer is Overcast or cloudy" } ], "clip_score": 14.1171875, "loss": 3.0437607765197754 }, { "id": "identity_122845", "image": "PMC-VQA/images/PMC6909124_acm212773-fig-0001_82999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many crosshair markers are present within the green contour?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 19.984375, "loss": 4.587619304656982 }, { "id": "identity_31336", "image": "PMC-VQA/images/PMC5923268_Fig3_307137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the subject of the photograph in (B)? \nChoices:\n(A) Muscle tissue \n(B) Nerve tissue \n(C) Skin tissue \n(D) Bone tissue " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.25, "loss": 4.767789363861084 }, { "id": "identity_36607", "image": "ScienceQA/images/11273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Washington?\nChoices:\n(A) Nashville\n(B) Richmond\n(C) Frankfort\n(D) Olympia" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 19.84375, "loss": 4.041341304779053 }, { "id": "identity_211672", "image": "A-OKVQA/images/331289.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the traffic light for the vehicles moving towards the camera?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.953125, "loss": 2.754316568374634 }, { "id": "identity_93478", "image": "TabMWP/images/10545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of rotten tomatoes per barrel in the 'Stem' column?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.84375, "loss": 2.6574020385742188 }, { "id": "identity_96589", "image": "IconQA/images/23208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a piece of wood, how many pieces of the same length can be cut from a 1-meter long wood?" }, { "from": "gpt", "value": "The answer is 14 pieces" } ], "clip_score": 36.0625, "loss": 2.3092710971832275 }, { "id": "identity_210025", "image": "A-OKVQA/images/293233.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the jersey of the player batting?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 21.1875, "loss": 1.8323414325714111 }, { "id": "identity_138010", "image": "DVQA/images/bar_train_00093966.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object has the highest preference in the shock category?" }, { "from": "gpt", "value": "The answer is father" } ], "clip_score": 26.578125, "loss": 3.681095838546753 }, { "id": "identity_123059", "image": "PMC-VQA/images/PMC4058497_fig1_298383.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is shown in this picture?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 29.46875, "loss": 2.945018768310547 }, { "id": "identity_205056", "image": "PMC-VQA/images/PMC7541359_Fig2_267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging is shown in the picture?" }, { "from": "gpt", "value": "The answer is MRI scan" } ], "clip_score": 30.484375, "loss": 2.7429237365722656 }, { "id": "identity_156844", "image": "ScienceQA/images/943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the coin in the image in its original condition?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.4375, "loss": 2.8507907390594482 }, { "id": "identity_106426", "image": "TextVQA/images/aac56de71155f98e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date displayed on the screen?" }, { "from": "gpt", "value": "The answer is Thursday, September 19" } ], "clip_score": 23.21875, "loss": 1.580429196357727 }, { "id": "identity_191937", "image": "PMC-VQA/images/PMC4988790_f0035_63773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we determine the orientation of the brain slice in the image?" }, { "from": "gpt", "value": "The answer is Yes, it is a transverse or axial slice" } ], "clip_score": 31.296875, "loss": 2.5890676975250244 }, { "id": "identity_125877", "image": "VQA-AS/images/465.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the lighting suggest?" }, { "from": "gpt", "value": "The answer is Afternoon" } ], "clip_score": 19.296875, "loss": 2.942784547805786 }, { "id": "identity_271453", "image": "FigureQA/images/17828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick not the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.96875, "loss": 3.748631238937378 }, { "id": "identity_115468", "image": "A-OKVQA/images/47419.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headwear is the person on the vehicle wearing?" }, { "from": "gpt", "value": "The answer is A hat" } ], "clip_score": 19.34375, "loss": 3.8006832599639893 }, { "id": "identity_230751", "image": "VQA-AS/images/2175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the boy doing with the shovel in his hand?" }, { "from": "gpt", "value": "The answer is Playing in the sand" } ], "clip_score": 26.265625, "loss": 2.75948429107666 }, { "id": "identity_148694", "image": "DocVQA/images/rgjm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is FOOD AND NUTRITION SCIENCE VS. THE HOLISTIC CONCEPT" } ], "clip_score": 26.640625, "loss": 1.591187834739685 }, { "id": "identity_90970", "image": "UniGeo/images/213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if line DF is parallel to line AE, and angle B is 50 degrees, what is the measure of angle A?" }, { "from": "gpt", "value": "The answer is 25 degrees" } ], "clip_score": 30.3125, "loss": 2.055898427963257 }, { "id": "identity_187387", "image": "A-OKVQA/images/240950.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible on the table?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 13.140625, "loss": 4.621762275695801 }, { "id": "identity_100978", "image": "CLEVR-Math/images/CLEVR_train_039012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in total?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 22.640625, "loss": 1.8434648513793945 }, { "id": "identity_223339", "image": "VizWiz/images/VizWiz_train_00011989.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What shape are the crackers?" }, { "from": "gpt", "value": "The answer is Square" } ], "clip_score": 25.6875, "loss": 3.8500168323516846 }, { "id": "identity_94330", "image": "TabMWP/images/11235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can Austin afford to buy three packages of crayons with his money?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.859375, "loss": 2.627155303955078 }, { "id": "identity_292480", "image": "DocVQA/images/gkvh0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Chief of Cardiology at Yale University?" }, { "from": "gpt", "value": "The answer is Lawrence S. Cohen, M.D." } ], "clip_score": 20.4375, "loss": 1.8354066610336304 }, { "id": "identity_168190", "image": "IconQA/images/19415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each large star represents 5 points and the small star represents 1 point, what is the total number of points for all stars?" }, { "from": "gpt", "value": "The answer is 51 points" } ], "clip_score": 31.265625, "loss": 2.4458281993865967 }, { "id": "identity_252214", "image": "VQA2.0/images/77858.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many major kitchen appliances are visible in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Two; fridge and stove/oven" } ], "clip_score": 29.46875, "loss": 3.218717098236084 }, { "id": "identity_97010", "image": "IconQA/images/69383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are in the second row?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.078125, "loss": 2.125678777694702 }, { "id": "identity_267566", "image": "DVQA/images/bar_train_00165755.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars solid colors without any patterns on them?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.1875, "loss": 3.6107330322265625 }, { "id": "identity_323670", "image": "GeoQA+/images/827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,四边形ABCD中缺失∠C,经测量得∠A=110°,∠D=75°,∠1=45°,则这个四边形缺失前的∠C的度数为()\n\nChoices:\n(A) 75°\n(B) 60°\n(C) 45°\n(D) 40°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.984375, "loss": 2.4626195430755615 }, { "id": "identity_23486", "image": "VQA2.0/images/194159.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are those vehicles floating?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.0078125, "loss": 3.6744308471679688 }, { "id": "identity_201756", "image": "PMC-VQA/images/PMC5033523_pone.0163637.g003_76298.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue is predominantly seen in the image?" }, { "from": "gpt", "value": "The answer is Connective tissue" } ], "clip_score": 28.59375, "loss": 3.0205507278442383 }, { "id": "identity_246505", "image": "PMC-VQA/images/PMC6441068_Fig16_459238.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing radiological features of Crohn's Disease as seen in the image, and how do they differ from the features of Ulcerative Colitis, Diverticulitis, and Irritable Bowel Syndrome?" }, { "from": "gpt", "value": "The answer is Transmural inflammation, skip lesions, fistulas, cobblestone appearance" } ], "clip_score": 28.390625, "loss": 2.3657033443450928 }, { "id": "identity_130771", "image": "VQA2.0/images/275511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cat sitting on the toilet?" }, { "from": "gpt", "value": "The answer is Brown and black" } ], "clip_score": 28.15625, "loss": 3.5396862030029297 }, { "id": "identity_245176", "image": "A-OKVQA/images/196104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total players are on the field during a baseball game, including both teams?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 25.765625, "loss": 1.787988543510437 }, { "id": "identity_22154", "image": "A-OKVQA/images/544732.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Passengers disembarking from the busses seen here might do what in the yellow canopied building?\nChoices:\n(A) catch rides\n(B) complain\n(C) dine\n(D) game" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.078125, "loss": 4.403393745422363 }, { "id": "identity_306590", "image": "FigureQA/images/31820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua the high value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.203125, "loss": 4.024133205413818 }, { "id": "identity_132773", "image": "Super-CLEVR/images/superCLEVR_new_000997.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels that are a different color than the vehicle body?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.03125, "loss": 3.0277020931243896 }, { "id": "identity_181646", "image": "CLEVR-Math/images/CLEVR_train_047816.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are the cubes in the image?" }, { "from": "gpt", "value": "The answer is Green and Purple" } ], "clip_score": 31.375, "loss": 2.4141011238098145 }, { "id": "identity_244324", "image": "DocVQA/images/jnxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author of the document and what is his professional title and affiliation?" }, { "from": "gpt", "value": "The answer is Robert E. Shank, Danforth Professor, Washington University School of Medicine" } ], "clip_score": 26.46875, "loss": 2.045971393585205 }, { "id": "identity_320085", "image": "PMC-VQA/images/PMC7856147_F7_358596.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of microscope was used to capture this image? \nChoices:\n(A) Transmission electron microscope \n(B) Scanning electron microscope \n(C) Optical microscope \n(D) Confocal microscope" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.484375, "loss": 3.844088315963745 }, { "id": "identity_26837", "image": "TabMWP/images/11781.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a train ticket to Baltimore cost than a train ticket to Chicago?" }, { "from": "gpt", "value": "The answer is 469" } ], "clip_score": 33.3125, "loss": 2.8872461318969727 }, { "id": "identity_270149", "image": "IconQA/images/22976.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability of selecting a black marble without looking?\nChoices:\n(A) probable\n(B) certain\n(C) unlikely\n(D) impossible" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.5, "loss": 3.97627592086792 }, { "id": "identity_170290", "image": "IconQA/images/25.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 4 o'clock is it?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 29.828125, "loss": 1.8388068675994873 }, { "id": "identity_316102", "image": "IconQA/images/75045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Use the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.59375, "loss": 2.4511663913726807 }, { "id": "identity_313054", "image": "MapQA/images/map_12098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this place have the highest value in the USA?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.0, "loss": 2.6365342140197754 }, { "id": "identity_77604", "image": "ScienceQA/images/3548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does this image likely depict?" }, { "from": "gpt", "value": "The answer is Midday or early afternoon" } ], "clip_score": 14.796875, "loss": 2.801224946975708 }, { "id": "identity_161758", "image": "GeoQA+/images/5281.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side AC in the right triangle ABC?" }, { "from": "gpt", "value": "The answer is √15 or approximately 3.87" } ], "clip_score": 34.1875, "loss": 2.189988851547241 }, { "id": "identity_237646", "image": "TextVQA/images/a6228019c9381e58.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the three different types of signs shown in the image, and what do they collectively indicate about the road conditions ahead?" }, { "from": "gpt", "value": "The answer is continuous curves, no overtaking" } ], "clip_score": 25.640625, "loss": 3.3640761375427246 }, { "id": "identity_168467", "image": "IconQA/images/46719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a piece of wood, how many pieces of 2 centimeters each can be cut from it?" }, { "from": "gpt", "value": "The answer is 5 pieces" } ], "clip_score": 35.25, "loss": 2.4108192920684814 }, { "id": "identity_236600", "image": "UniGeo/images/2907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, A、B、C三点都在⊙O上, 若∠C=34°, 则∠AOB的度数是()。已知∠AOC=2∠C, 求∠AOB的度数。" }, { "from": "gpt", "value": "The answer is 102°" } ], "clip_score": 33.6875, "loss": 1.949065089225769 }, { "id": "identity_195708", "image": "PMC-VQA/images/PMC5732013_FIG5_253432.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any abnormality or lesion in this brain image?" }, { "from": "gpt", "value": "The answer is No abnormality or lesion identified" } ], "clip_score": 30.90625, "loss": 2.48062801361084 }, { "id": "identity_102572", "image": "CLEVR-Math/images/CLEVR_train_055242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.484375, "loss": 2.4097931385040283 }, { "id": "identity_114099", "image": "TextVQA/images/3b2c66790d2429e8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the net volume of the drink in the bottle?" }, { "from": "gpt", "value": "The answer is 150 ml" } ], "clip_score": 24.796875, "loss": 1.4419355392456055 }, { "id": "identity_216315", "image": "VQA2.0/images/378293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the horse attached to?" }, { "from": "gpt", "value": "The answer is A cart or carriage" } ], "clip_score": 27.953125, "loss": 4.149530410766602 }, { "id": "identity_268950", "image": "TQA/images/abc_question_images/human_system_nervous_16177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the label that corresponds to the temporal lobe:\n(A) D\n(B) C\n(C) R\n(D) N" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.5, "loss": 4.049330711364746 }, { "id": "identity_23916", "image": "MapQA/images/map_10508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the map have missing data ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.828125, "loss": 3.0363142490386963 }, { "id": "identity_58695", "image": "MapQA/images/map_8651.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the number of youngsters in Florida greater than in New York?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 31.34375, "loss": 3.0647501945495605 }, { "id": "identity_24366", "image": "GeoQA+/images/2380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A、B、C是⊙O上的三个点,若∠C=28°,则∠OBA的度数为()\nChoices:\n(A) 28°\n(B) 56°\n(C) 57°\n(D) 62°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.15625, "loss": 2.6436805725097656 }, { "id": "identity_268334", "image": "DocVQA/images/qhxn0020_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide me with the Zip code?" }, { "from": "gpt", "value": "The answer is 02021-1484" } ], "clip_score": 19.140625, "loss": 2.310699701309204 }, { "id": "identity_188103", "image": "PMC-VQA/images/PMC6214163_Fig1_390456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any mass or tumor in the chest cavity?" }, { "from": "gpt", "value": "The answer is No visible mass or tumor" } ], "clip_score": 30.484375, "loss": 3.3715057373046875 }, { "id": "identity_239765", "image": "VQA2.0/images/139889.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many giraffes are on the field and what are they doing?" }, { "from": "gpt", "value": "The answer is two giraffes, one near the tree and one near the logs" } ], "clip_score": 30.828125, "loss": 2.034420967102051 }, { "id": "identity_54716", "image": "DVQA/images/bar_train_00091575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in preference percentage between 'horse' in the 'gas' category and 'mode' in the 'back' category?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 33.5625, "loss": 2.048194169998169 }, { "id": "identity_23462", "image": "TextVQA/images/9bfb77b253a7f005.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what years are listed on the green day ad?" }, { "from": "gpt", "value": "The answer is 1994-2014" } ], "clip_score": 22.890625, "loss": 1.1215856075286865 }, { "id": "identity_175295", "image": "TabMWP/images/20556.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many fireflies are there in 7 jars?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 32.09375, "loss": 1.8246663808822632 }, { "id": "identity_75711", "image": "AI2D/images/420.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for the food web depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Phytoplankton" } ], "clip_score": 33.40625, "loss": 1.8567308187484741 }, { "id": "identity_22682", "image": "ScienceQA/images/9210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country is highlighted?\nChoices:\n(A) Haiti\n(B) Dominica\n(C) Saint Vincent and the Grenadines\n(D) the Dominican Republic" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.875, "loss": 4.157762050628662 }, { "id": "identity_273451", "image": "VQA2.0/images/395396.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of individuals riding bicycles?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.375, "loss": 2.039077043533325 }, { "id": "identity_254865", "image": "TextVQA/images/030cc39bb84dd742.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full title and author of the book Ashlee is holding, and which university library is she in?" }, { "from": "gpt", "value": "The answer is The Complete Sherlock Holmes by Sir Arthur Conan Doyle, Indiana University East" } ], "clip_score": 32.625, "loss": 1.2365175485610962 }, { "id": "identity_169158", "image": "IconQA/images/12576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total duration of the three activities Levi did Tuesday morning?" }, { "from": "gpt", "value": "The answer is 3.5 hours" } ], "clip_score": 23.6875, "loss": 1.9097336530685425 }, { "id": "identity_256312", "image": "DocVQA/images/lxdw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different speakers are scheduled to present during the scientific session on Friday from 8:00 am to 12:30 pm?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 24.921875, "loss": 2.4909074306488037 }, { "id": "identity_180845", "image": "TabMWP/images/13586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more people visited the mall 4 times than 5 times last month?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.671875, "loss": 1.9203256368637085 }, { "id": "identity_113890", "image": "A-OKVQA/images/14677.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the men engaged in?" }, { "from": "gpt", "value": "The answer is Cross-country skiing" } ], "clip_score": 30.6875, "loss": 2.156832456588745 }, { "id": "identity_134886", "image": "DVQA/images/bar_train_00177249.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items have a total sales of more than 20 units across all stores?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.59375, "loss": 2.3886706829071045 }, { "id": "identity_23922", "image": "ScienceQA/images/14657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Utah?\nChoices:\n(A) Provo\n(B) Salt Lake City\n(C) Sioux Falls\n(D) Boise" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.25, "loss": 4.24957275390625 }, { "id": "identity_154736", "image": "AI2D/images/1037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During which month does the Southern Hemisphere experience more hours of sunlight?" }, { "from": "gpt", "value": "The answer is December" } ], "clip_score": 31.609375, "loss": 2.9638683795928955 }, { "id": "identity_321886", "image": "IconQA/images/29964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jen is dancing at a party this evening. The clock shows something. What is the time?\nChoices:\n(A) 6:00 P.M.\n(B) 6:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.328125, "loss": 3.631304979324341 }, { "id": "identity_177020", "image": "TabMWP/images/19186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many days did the inn host more than 5 guests?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.4375, "loss": 2.7579498291015625 }, { "id": "identity_256036", "image": "TabMWP/images/18480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Tristan need to buy a pair of tennis shoes, a pair of boots, and a black jacket?" }, { "from": "gpt", "value": "The answer is 92" } ], "clip_score": 37.8125, "loss": 3.2104949951171875 }, { "id": "identity_70733", "image": "ScienceQA/images/9156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What state borders the western side of Pennsylvania?" }, { "from": "gpt", "value": "The answer is Ohio" } ], "clip_score": 27.15625, "loss": 2.623887538909912 }, { "id": "identity_323480", "image": "VQA2.0/images/137547.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are there?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 16.453125, "loss": 1.906561255455017 }, { "id": "identity_3937", "image": "A-OKVQA/images/5282.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the boat going?\nChoices:\n(A) over bridge\n(B) home\n(C) under bridge\n(D) up river" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.6875, "loss": 3.9860751628875732 }, { "id": "identity_161840", "image": "UniGeo/images/803.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if AB is the diameter of the semicircle and point D is the midpoint of arc AC, what is the ratio of the length of arc AD to the length of arc DC?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 34.71875, "loss": 1.1053595542907715 }, { "id": "identity_249007", "image": "UniGeo/images/3268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB∥CD,\\frac{AO}{OD}=\\frac{2}{3},若AB=6,CD=9,则△AOB与△DOC的面积比是()" }, { "from": "gpt", "value": "The answer is \\(\\frac{4}{9}\\)" } ], "clip_score": 30.5625, "loss": 2.34348201751709 }, { "id": "identity_290107", "image": "ChartQA/images/OECD_PRIMARY_ENERGY_SUPPLY_CHN_NAM_000004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value when adding the values of Namibia and China?" }, { "from": "gpt", "value": "The answer is 0.227" } ], "clip_score": 27.578125, "loss": 2.2793078422546387 }, { "id": "identity_287851", "image": "ScienceQA/images/7665.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do these magnets have an attractive or repulsive force between them?\nChoices:\n(A) attract\n(B) repel" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.609375, "loss": 3.8768982887268066 }, { "id": "identity_134421", "image": "DocVQA/images/gxyn0081_144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Total Mid-Price for New Group for March 1993?" }, { "from": "gpt", "value": "The answer is 16.8" } ], "clip_score": 26.421875, "loss": 2.193108558654785 }, { "id": "identity_71905", "image": "TQA/images/question_images/faults_189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term used to describe the block of land that lies above the fault plane?" }, { "from": "gpt", "value": "The answer is Hanging wall" } ], "clip_score": 37.15625, "loss": 3.8136954307556152 }, { "id": "identity_316074", "image": "AI2D/images/3599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the part of the alimentary canal that goes from the mouth and nasal cavities to the larynx, where it continues with the esophagus?\nChoices:\n(A) The pharynx.\n(B) The bronchus.\n(C) The trachea.\n(D) The diaphragm." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.21875, "loss": 3.0567848682403564 }, { "id": "identity_153455", "image": "ScienceQA/images/5784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which river forms the western border of the highlighted state?" }, { "from": "gpt", "value": "The answer is Mississippi River" } ], "clip_score": 28.734375, "loss": 2.85314679145813 }, { "id": "identity_245749", "image": "VQA2.0/images/370922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the image, and what types are they?" }, { "from": "gpt", "value": "The answer is six vehicles: one tow truck, three ATVs, one red vehicle, and one yellow vehicle" } ], "clip_score": 28.265625, "loss": 1.9860520362854004 }, { "id": "identity_303661", "image": "DVQA/images/bar_train_00178614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars?" }, { "from": "gpt", "value": "The answer is mood" } ], "clip_score": 23.71875, "loss": 3.562624931335449 }, { "id": "identity_89099", "image": "UniGeo/images/1969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point C is a point on line segment AB, point D is the midpoint of line segment BC, AB=10, and AC=6, what is the length of line segment CD?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.46875, "loss": 2.477168083190918 }, { "id": "identity_63644", "image": "ScienceQA/images/10341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the large island located to the northeast of the highlighted country?" }, { "from": "gpt", "value": "The answer is Puerto Rico" } ], "clip_score": 34.1875, "loss": 2.9511234760284424 }, { "id": "identity_182565", "image": "PMC-VQA/images/PMC8718535_Fig2_153886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the vertebral level shown in this image?" }, { "from": "gpt", "value": "The answer is L1" } ], "clip_score": 22.453125, "loss": 2.7640182971954346 }, { "id": "identity_178710", "image": "IconQA/images/77730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in length between the line and the 10 cm mark on the ruler?" }, { "from": "gpt", "value": "The answer is 3.5 cm" } ], "clip_score": 33.875, "loss": 1.8855822086334229 }, { "id": "identity_84097", "image": "Geometry3K/images/366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x if the perimeter of triangle ABE is 20?" }, { "from": "gpt", "value": "The answer is x = 7" } ], "clip_score": 32.28125, "loss": 1.9933044910430908 }, { "id": "identity_70365", "image": "AI2D/images/3203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is indicated at the point where the stem meets the roots?" }, { "from": "gpt", "value": "The answer is Interaction with symbiotic microbes" } ], "clip_score": 30.875, "loss": 2.142374277114868 }, { "id": "identity_40883", "image": "FigureQA/images/66722.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum value of Aqua?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 17.890625, "loss": 2.9244744777679443 }, { "id": "identity_29654", "image": "IconQA/images/35689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many triangles are there?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 29.9375, "loss": 2.262220621109009 }, { "id": "identity_218720", "image": "A-OKVQA/images/310240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of barrier separates the giraffes from the background?" }, { "from": "gpt", "value": "The answer is A metal fence" } ], "clip_score": 31.09375, "loss": 3.2591302394866943 }, { "id": "identity_13612", "image": "TabMWP/images/28348.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost for 2+1/2 pounds of wheat flakes?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 33.9375, "loss": 3.1660854816436768 }, { "id": "identity_77066", "image": "TQA/images/question_images/continental_drift_9079.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the names of the two supercontinents shown on the map?" }, { "from": "gpt", "value": "The answer is Laurasia and Gondwana" } ], "clip_score": 35.25, "loss": 1.5144580602645874 }, { "id": "identity_270498", "image": "IconQA/images/4296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vans present?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 27.09375, "loss": 1.7138627767562866 }, { "id": "identity_283047", "image": "CLEVR-Math/images/CLEVR_train_029938.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the blue metal blocks and all the big purple metallic spheres, how many objects remain?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 32.78125, "loss": 2.066091775894165 }, { "id": "identity_281200", "image": "VQA2.0/images/451767.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of umbrellas shown in the picture?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.453125, "loss": 2.084465503692627 }, { "id": "identity_107873", "image": "A-OKVQA/images/55868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of trees are planted along the pier?" }, { "from": "gpt", "value": "The answer is Palm trees" } ], "clip_score": 17.8125, "loss": 2.534440755844116 }, { "id": "identity_262752", "image": "PMC-VQA/images/PMC6825074_Fig6_59899.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, what does the red arrow indicate pointing towards?\nChoices:\n(A) Synovial thickening\n(B) Muscle tear\n(C) Nerve entrapment\n(D) Tendon rupture" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.765625, "loss": 5.038020610809326 }, { "id": "identity_265074", "image": "A-OKVQA/images/124739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event is expected to commence on September 29th in this location? Will it be:\n(A) marching practice\n(B) free parking\n(C) road work\n(D) candy giveaway" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.265625, "loss": 3.833749294281006 }, { "id": "identity_154615", "image": "ScienceQA/images/10202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any flowers visible among the Victoria amazonica leaves in the image?" }, { "from": "gpt", "value": "The answer is Yes, one white flower" } ], "clip_score": 32.25, "loss": 3.8671770095825195 }, { "id": "identity_310324", "image": "PMC-VQA/images/PMC5884212_fig5_297237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the colors red, green, yellow, and blue represent in the image?\nChoices:\n(A) Brightness level of FLAIR image\n(B) Blood flow rate\n(C) Tumor size\n(D) Types of tissue found in the image." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.515625, "loss": 4.728598594665527 }, { "id": "identity_8984", "image": "VQA2.0/images/48759.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who crosses the street?" }, { "from": "gpt", "value": "The answer is oman" } ], "clip_score": 18.203125, "loss": 5.123161792755127 }, { "id": "identity_161084", "image": "GeoQA+/images/1713.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if AB is the diameter of circle O and ∠CDB = 25°, what is the measure of ∠ODB?" }, { "from": "gpt", "value": "The answer is 50°" } ], "clip_score": 34.09375, "loss": 1.9713518619537354 }, { "id": "identity_332004", "image": "Super-CLEVR/images/superCLEVR_new_001478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the thing that is to the right of the small shiny tandem bike and in front of the green metallic motorbike?" }, { "from": "gpt", "value": "The answer is large" } ], "clip_score": 29.6875, "loss": 4.6062822341918945 }, { "id": "identity_89998", "image": "GeoQA+/images/5468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the shaded region?" }, { "from": "gpt", "value": "The answer is 25 cm^2" } ], "clip_score": 34.0625, "loss": 1.7702564001083374 }, { "id": "identity_9444", "image": "TQA/images/question_images/lewis_dot_diagrams_9135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: According to the diagram, how protons does the Hydrogen atom have?\nChoices:\n(A) 2\n(B) 1\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.078125, "loss": 2.879117250442505 }, { "id": "identity_64681", "image": "TQA/images/question_images/cycle_rock_6733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process leads to the formation of metamorphic rocks from sedimentary rocks?" }, { "from": "gpt", "value": "The answer is Heat, pressure, and stress" } ], "clip_score": 27.390625, "loss": 1.7675673961639404 }, { "id": "identity_98325", "image": "TabMWP/images/3622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much would it cost to buy one of each item on the menu?" }, { "from": "gpt", "value": "The answer is $13.52" } ], "clip_score": 25.171875, "loss": 1.8117666244506836 }, { "id": "identity_274665", "image": "A-OKVQA/images/94307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which item, resembling an orange, is located near the bottom of the pile?\nChoices:\n(A) garfield doll\n(B) lime\n(C) carrot\n(D) thumb tack" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.4296875, "loss": 3.877406358718872 }, { "id": "identity_129354", "image": "VizWiz/images/VizWiz_train_00000017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the beads inside the jar?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 32.40625, "loss": 2.9060230255126953 }, { "id": "identity_259474", "image": "PMC-VQA/images/PMC5746931_nanomaterials-07-00441-f011_258491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure being examined in the image and describe the key histological features that help in identifying it as lung epithelium" }, { "from": "gpt", "value": "The answer is Lung epithelium with alveolar sacs and bronchiole" } ], "clip_score": 26.9375, "loss": 2.216911792755127 }, { "id": "identity_171571", "image": "IconQA/images/34808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line represents a stick and it is broken into two equal parts, what would be the length of each part in centimeters?" }, { "from": "gpt", "value": "The answer is 3.95 centimeters" } ], "clip_score": 31.21875, "loss": 1.9412461519241333 }, { "id": "identity_215080", "image": "VizWiz/images/VizWiz_train_00003741.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What feature of the product is highlighted at the top of the label?" }, { "from": "gpt", "value": "The answer is Clinical performance stick" } ], "clip_score": 28.1875, "loss": 2.9354875087738037 }, { "id": "identity_200562", "image": "PMC-VQA/images/PMC6907138_Fig1_82610.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the visible abnormality in the MRI image?" }, { "from": "gpt", "value": "The answer is Hyperintensity in the left hemisphere" } ], "clip_score": 35.65625, "loss": 2.7922000885009766 }, { "id": "identity_12389", "image": "A-OKVQA/images/382104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Ignoring everything else about the image what should pedestrians do about crossing the street according to the traffic light?\nChoices:\n(A) go around\n(B) wait\n(C) cross\n(D) give up" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.6796875, "loss": 3.399024486541748 }, { "id": "identity_139076", "image": "FigureQA/images/47152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors have a smaller portion than Medium Purple?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.71875, "loss": 2.4180686473846436 }, { "id": "identity_284710", "image": "ChartQA/images/2522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of Americans support the legalization of marijuana for both medical and recreational purposes?" }, { "from": "gpt", "value": "The answer is 0.59" } ], "clip_score": 35.78125, "loss": 2.5418803691864014 }, { "id": "identity_173857", "image": "IconQA/images/69597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern of the toy sequence?" }, { "from": "gpt", "value": "The answer is Plane, teddy bear, train, block" } ], "clip_score": 30.375, "loss": 3.464080810546875 }, { "id": "identity_143140", "image": "DocVQA/images/rmnp0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date when this letter was written?" }, { "from": "gpt", "value": "The answer is January 30, 1976" } ], "clip_score": 26.09375, "loss": 1.7563856840133667 }, { "id": "identity_289032", "image": "A-OKVQA/images/412908.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of store is this?\nOptions:\n(A) body\n(B) gift\n(C) hair\n(D) auto" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.390625, "loss": 4.191986083984375 }, { "id": "identity_127499", "image": "A-OKVQA/images/343185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of batteries are in the package?" }, { "from": "gpt", "value": "The answer is AA batteries" } ], "clip_score": 15.5859375, "loss": 2.762883424758911 }, { "id": "identity_42469", "image": "FigureQA/images/48643.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dominant color in the pie chart?" }, { "from": "gpt", "value": "The answer is Light Gold" } ], "clip_score": 37.84375, "loss": 3.3053019046783447 }, { "id": "identity_152432", "image": "ScienceQA/images/2937.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different genotypes are possible for the offspring?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.203125, "loss": 2.318617105484009 }, { "id": "identity_81853", "image": "UniGeo/images/733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of chord CD in the circle O?" }, { "from": "gpt", "value": "The answer is Length of CD is equal to the radius of the circle O" } ], "clip_score": 35.46875, "loss": 1.975061297416687 }, { "id": "identity_227225", "image": "Super-CLEVR/images/superCLEVR_new_001511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object that is to the left of the small blue metallic airplane and behind the purple bus?" }, { "from": "gpt", "value": "The answer is gray" } ], "clip_score": 27.4375, "loss": 4.355593204498291 }, { "id": "identity_309676", "image": "DocVQA/images/xfjv0228_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the provided Office Phone No?" }, { "from": "gpt", "value": "The answer is 676-3331" } ], "clip_score": 23.921875, "loss": 2.4139750003814697 }, { "id": "identity_50788", "image": "FigureQA/images/87549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value for Medium Blue at any point on the x-axis?" }, { "from": "gpt", "value": "The answer is Approximately 40" } ], "clip_score": 25.65625, "loss": 2.62188458442688 }, { "id": "identity_21752", "image": "TabMWP/images/22838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Gabrielle kept a tally of the number of emails she received each day for a week. According to the table, what was the rate of change between Monday and Tuesday?" }, { "from": "gpt", "value": "The answer is -2" } ], "clip_score": 38.375, "loss": 2.7922146320343018 }, { "id": "identity_58555", "image": "DocVQA/images/llxb0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fungicides registered for use on all cereal crops listed?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.046875, "loss": 2.4774372577667236 }, { "id": "identity_3081", "image": "VizWiz/images/VizWiz_train_00015956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is that?" }, { "from": "gpt", "value": "The answer is sanitary gel" } ], "clip_score": 26.984375, "loss": 3.0170252323150635 }, { "id": "identity_319941", "image": "Geometry3K/images/86.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, m \\angle 2 = 110 and m \\angle 12 = 55. Determine the measure of \\angle 2.\nChoices:\n(A) 55\n(B) 70\n(C) 80\n(D) 110" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.796875, "loss": 2.6695663928985596 }, { "id": "identity_34374", "image": "IconQA/images/84779.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) turn\n(B) slide\n(C) flip" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.296875, "loss": 4.475168704986572 }, { "id": "identity_289208", "image": "VQA2.0/images/572894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a fire hydrant present in this location?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.96875, "loss": 3.7101829051971436 }, { "id": "identity_276052", "image": "FigureQA/images/95784.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Violet Red the color with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.640625, "loss": 4.180392742156982 }, { "id": "identity_193164", "image": "A-OKVQA/images/199969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest priced fruit per piece displayed in the image?" }, { "from": "gpt", "value": "The answer is Pomegranates £1.45" } ], "clip_score": 21.484375, "loss": 2.3829329013824463 }, { "id": "identity_307180", "image": "FigureQA/images/73685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Cyan the one with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.53125, "loss": 4.568436622619629 }, { "id": "identity_48072", "image": "DVQA/images/bar_train_00138744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does 'meal' have a higher value for 'status' than 'blonde'?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 24.078125, "loss": 3.1306941509246826 }, { "id": "identity_296152", "image": "IconQA/images/83824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dots?" }, { "from": "gpt", "value": "The answer is 19" } ], "clip_score": 25.375, "loss": 1.9762605428695679 }, { "id": "identity_45916", "image": "DocVQA/images/lzxh0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the equivalent square feet for 1 yard of 33 inch cloth?" }, { "from": "gpt", "value": "The answer is 1 square foot" } ], "clip_score": 11.8046875, "loss": 2.0731914043426514 }, { "id": "identity_86320", "image": "Geometry3K/images/1082.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diameter GJ if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 10 units" } ], "clip_score": 36.28125, "loss": 1.8867700099945068 }, { "id": "identity_283922", "image": "A-OKVQA/images/53975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: By what criteria is the waiting room segregated?\nChoices:\n(A) race\n(B) intelligence\n(C) gender\n(D) age" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.1796875, "loss": 4.5675368309021 }, { "id": "identity_218780", "image": "A-OKVQA/images/261316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bus route number?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 27.375, "loss": 3.0592267513275146 }, { "id": "identity_244270", "image": "IconQA/images/59393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many flowers are there in each row, and what is the total number of rows?" }, { "from": "gpt", "value": "The answer is 6 rows" } ], "clip_score": 34.6875, "loss": 3.4706625938415527 }, { "id": "identity_157826", "image": "ScienceQA/images/19004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the temperature change for the vial in the refrigerator?" }, { "from": "gpt", "value": "The answer is 1.5°C" } ], "clip_score": 33.1875, "loss": 1.746437430381775 }, { "id": "identity_15530", "image": "TQA/images/question_images/human_system_reproductory_7038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What covers the testis?\nChoices:\n(A) bladder\n(B) penis\n(C) epididymis\n(D) scrotum" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.640625, "loss": 3.872641086578369 }, { "id": "identity_174444", "image": "IconQA/images/44733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the pencil from the tip to the end of the eraser cap in centimeters?" }, { "from": "gpt", "value": "The answer is 7 centimeters" } ], "clip_score": 37.375, "loss": 1.7349077463150024 }, { "id": "identity_148781", "image": "DocVQA/images/qkvb0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the zip code for the location of The Nestle Company, Inc.?" }, { "from": "gpt", "value": "The answer is 10605" } ], "clip_score": 18.703125, "loss": 2.616450786590576 }, { "id": "identity_38737", "image": "DVQA/images/bar_train_00018790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bars have a value greater than 5?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.5, "loss": 2.5906667709350586 }, { "id": "identity_21878", "image": "DocVQA/images/jnxd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is Nutritional Characteristics of Aging Persons - An Overview" } ], "clip_score": 19.078125, "loss": 1.216798186302185 }, { "id": "identity_225066", "image": "UniGeo/images/1170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC为⊙O的内接三角形,AB为⊙O的直径,点D在⊙O上,∠ADC=65°,求∠BAC和∠ACD的和是多少度?" }, { "from": "gpt", "value": "The answer is 65°" } ], "clip_score": 29.703125, "loss": 2.0689804553985596 }, { "id": "identity_41318", "image": "FigureQA/images/46717.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Violet Red section larger than the combined sections of Medium Purple and Cornflower?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 40.0625, "loss": 3.2260372638702393 }, { "id": "identity_55953", "image": "DocVQA/images/hnfw0228_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the original invoice for the prepaid freight?" }, { "from": "gpt", "value": "The answer is 1985-00" } ], "clip_score": 25.609375, "loss": 3.2601592540740967 }, { "id": "identity_79350", "image": "UniGeo/images/586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point C is moved 2 meters closer to point B, what will be the new height AC?" }, { "from": "gpt", "value": "The answer is The new height AC will be approximately 107.14 cm" } ], "clip_score": 28.171875, "loss": 2.6391937732696533 }, { "id": "identity_71842", "image": "ScienceQA/images/14723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If we combine Solution A and Solution B, how many purple particles will be in the new solution?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 30.421875, "loss": 3.52449107170105 }, { "id": "identity_145005", "image": "FigureQA/images/47163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by Medium Turquoise on the x-axis?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 30.71875, "loss": 2.515650510787964 }, { "id": "identity_164113", "image": "GeoQA+/images/5321.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If CD⊥OA and CE⊥OB, and ∠DCE=80°, what is the measure of ∠OCE?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 30.828125, "loss": 1.562192678451538 }, { "id": "identity_118953", "image": "TextVQA/images/66e16785047c416a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bin in front of the wall?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.6875, "loss": 4.489922046661377 }, { "id": "identity_216410", "image": "A-OKVQA/images/513854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the people engaged in?" }, { "from": "gpt", "value": "The answer is Playing Ultimate Frisbee" } ], "clip_score": 30.453125, "loss": 2.0867655277252197 }, { "id": "identity_45863", "image": "DVQA/images/bar_train_00027207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the smallest total value when summing \"pond\", \"artist\", and \"minute\"?" }, { "from": "gpt", "value": "The answer is bullet" } ], "clip_score": 30.421875, "loss": 3.5994765758514404 }, { "id": "identity_304382", "image": "ChartQA/images/18143564004828.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bar in the middle?" }, { "from": "gpt", "value": "The answer is Liberia" } ], "clip_score": 10.8359375, "loss": 3.5371599197387695 }, { "id": "identity_19914", "image": "TQA/images/question_images/parts_ocean_floor_9207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the area between coast and deep ocean basin called?\nChoices:\n(A) Deep sea fan\n(B) Continental margin\n(C) Volcanic island\n(D) Ridge" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.78125, "loss": 4.088377952575684 }, { "id": "identity_284344", "image": "TabMWP/images/9967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which event in the schedule starts at 6.00 P.M.?\nChoices:\n(A) theater auditions\n(B) choir rehearsal\n(C) orchestra rehearsal\n(D) swim team practice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.6875, "loss": 4.3632707595825195 }, { "id": "identity_302704", "image": "GEOS/images/062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, if x is less than 90 degrees, then which of the following must be true?\nChoices:\n(A) l is parallel to m\n(B) l is perpendicular to m\n(C) y is less than 90 degrees\n(D) y equals 90 degrees\n(E) y is greater than 90 degrees" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.21875, "loss": 4.169745445251465 }, { "id": "identity_87646", "image": "UniGeo/images/2509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle COD?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 30.9375, "loss": 2.115569591522217 }, { "id": "identity_250280", "image": "PMC-VQA/images/PMC5143360_Fig3_103322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relative position of the superior mesenteric artery (SMA) to both the left renal vein and the aorta, and what is the significance of this anatomical relationship in terms of potential clinical implications?" }, { "from": "gpt", "value": "The answer is SMA is above the left renal vein and branches from aorta" } ], "clip_score": 29.203125, "loss": 1.9450544118881226 }, { "id": "identity_70296", "image": "TQA/images/abc_question_images/human_system_circulatory_13642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does label R represent in the heart diagram?" }, { "from": "gpt", "value": "The answer is Right ventricle" } ], "clip_score": 34.65625, "loss": 2.357861042022705 }, { "id": "identity_256489", "image": "IconQA/images/5879.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of horses to saddles in the image?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 29.375, "loss": 1.579317331314087 }, { "id": "identity_261929", "image": "IconQA/images/85511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of squares present on the frame?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.75, "loss": 2.303201913833618 }, { "id": "identity_15850", "image": "FigureQA/images/5521.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Seafoam the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.296875, "loss": 3.8122544288635254 }, { "id": "identity_74387", "image": "TQA/images/question_images/aquifers_6971.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the time scale associated with the unconfined aquifer?" }, { "from": "gpt", "value": "The answer is Years" } ], "clip_score": 29.203125, "loss": 3.8424072265625 }, { "id": "identity_123122", "image": "PMC-VQA/images/PMC2213640_F1_16654.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the largest circular structure in the image?" }, { "from": "gpt", "value": "The answer is Estimate the diameter in micrometers based on the scale bar" } ], "clip_score": 15.5234375, "loss": 2.460702657699585 }, { "id": "identity_239365", "image": "VQA2.0/images/160848.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the boat, and how many other boats are visible in the background?" }, { "from": "gpt", "value": "The answer is Lady Joanne III, 5 boats" } ], "clip_score": 25.625, "loss": 4.156891345977783 }, { "id": "identity_163973", "image": "GeoQA+/images/5517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle, if angle B is 30 degrees and angle C is 60 degrees, and the length of AC is 3, what is the length of the chord BC?" }, { "from": "gpt", "value": "The answer is 1.5" } ], "clip_score": 35.96875, "loss": 1.3536580801010132 }, { "id": "identity_310902", "image": "FigureQA/images/91907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the one with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.015625, "loss": 4.14390230178833 }, { "id": "identity_317656", "image": "UniGeo/images/1208.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O上一点,AD和过点C的切线互相垂直,垂足为D。若∠DCA=55°,则∠CAO的度数是多少?\n选择:\n(A) 25°\n(B) 35°\n(C) 45°\n(D) 55°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.375, "loss": 2.653916120529175 }, { "id": "identity_2783", "image": "DocVQA/images/lfgn0226_46.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the new remote supervisor ID given in the form?" }, { "from": "gpt", "value": "The answer is 148294" } ], "clip_score": 23.21875, "loss": 2.790823221206665 }, { "id": "identity_246368", "image": "DVQA/images/bar_train_00119277.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the sum of the accuracies of the algorithms widow and visit in the dataset disk and the sum of the accuracies of the same algorithms in the dataset half?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 39.0625, "loss": 2.702423334121704 }, { "id": "identity_299989", "image": "PMC-VQA/images/PMC8102820_f1_435049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part of the brain exhibited patchy lesions among the following options: Temporal lobe, Cerebellum, Medulla oblongata, or Occipital lobe?\nChoices:\n(A) Temporal lobe \n(B) Cerebellum \n(C) Medulla oblongata \n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.96875, "loss": 4.330057621002197 }, { "id": "identity_121291", "image": "VQA-RAD/images/synpic24729.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any renal abnormalities in this image?" }, { "from": "gpt", "value": "The answer is No renal abnormalities identified" } ], "clip_score": 30.21875, "loss": 2.4519851207733154 }, { "id": "identity_123899", "image": "PMC-VQA/images/PMC4455327_Fig4_393478.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used in this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 30.6875, "loss": 2.832195997238159 }, { "id": "identity_223570", "image": "TextVQA/images/816c3822922356fe.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the product in the green bottle?" }, { "from": "gpt", "value": "The answer is Loux" } ], "clip_score": 22.59375, "loss": 5.223606109619141 }, { "id": "identity_210153", "image": "VQA2.0/images/70794.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any man-made structures be seen in the image?" }, { "from": "gpt", "value": "The answer is No man-made structures are visible" } ], "clip_score": 19.734375, "loss": 2.65206241607666 }, { "id": "identity_289788", "image": "A-OKVQA/images/14472.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the blue item most likely to be used? \n(A) Rwanda \n(B) London \n(C) Cameroon \n(D) Egypt" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.6171875, "loss": 4.678105354309082 }, { "id": "identity_61133", "image": "ChartQA/images/multi_col_20758.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest forecasted spending value in billions of dollars according to the October 2019 forecast?" }, { "from": "gpt", "value": "The answer is 865.12" } ], "clip_score": 31.640625, "loss": 2.3943581581115723 }, { "id": "identity_290274", "image": "PMC-VQA/images/PMC7906016_F3_374095.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging sequence was utilized to capture images (b-d) out of the following options: T2 sequence, DWI sequence, T1 sequence with contrast-enhancing, or T1 sequence without contrast-enhancing?\nChoices:\n(A) T2 sequence \n(B) DWI sequence \n(C) T1 sequence with contrast-enhancing \n(D) T1 sequence without contrast-enhancing." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.578125, "loss": 3.4592132568359375 }, { "id": "identity_160353", "image": "Geometry3K/images/1289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side y in the triangle?" }, { "from": "gpt", "value": "The answer is y = √65 or approximately 8.06" } ], "clip_score": 35.1875, "loss": 2.3549084663391113 }, { "id": "identity_266315", "image": "GEOS/images/028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the circle shown on the right, two tangents are drawn from a common external point. What is the measure of the angle denoted by x?\nChoices:\n(A) 60\n(B) 80\n(C) 85\n(D) 130" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.671875, "loss": 3.219287872314453 }, { "id": "identity_316509", "image": "TextVQA/images/5a35ae8804f9177a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What product is written under the word \"D1\"?" }, { "from": "gpt", "value": "The answer is coffee" } ], "clip_score": 19.890625, "loss": 3.5862247943878174 }, { "id": "identity_24438", "image": "PMC-VQA/images/PMC7602118_diagnostics-10-00812-f001_288154.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging studies were performed post-surgery?\nChoices:\n(A) CT scan \n(B) MRI \n(C) AP and lateral radiographs \n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.359375, "loss": 3.646996259689331 }, { "id": "identity_326574", "image": "TabMWP/images/32179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A city's school board compared the student attendance at each high school. What is the difference in the number of students between Riverside High and Truman High?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 37.1875, "loss": 2.7372887134552 }, { "id": "identity_179877", "image": "TabMWP/images/23701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much quantity is supplied at the lowest price?" }, { "from": "gpt", "value": "The answer is 3,000" } ], "clip_score": 27.28125, "loss": 1.5218437910079956 }, { "id": "identity_227553", "image": "DVQA/images/bar_train_00148644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the silver element across all categories?" }, { "from": "gpt", "value": "The answer is 62.86" } ], "clip_score": 29.1875, "loss": 2.2730817794799805 }, { "id": "identity_111917", "image": "TextVQA/images/5c30034c0876878e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of lighting is creating the shadow on the phone?" }, { "from": "gpt", "value": "The answer is Natural sunlight" } ], "clip_score": 20.1875, "loss": 2.9436745643615723 }, { "id": "identity_335056", "image": "IconQA/images/31805.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Adjust the ruler to find the length of the line to the nearest centimeter. The line is approximately (_) centimeters long." }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 32.90625, "loss": 2.637023687362671 }, { "id": "identity_105079", "image": "Super-CLEVR/images/superCLEVR_new_001957.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the smallest airplane in the image?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 22.78125, "loss": 3.100478410720825 }, { "id": "identity_316276", "image": "ScienceQA/images/14860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this a mineral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.90625, "loss": 3.7807796001434326 }, { "id": "identity_7179", "image": "DocVQA/images/zmwm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the date mentioned in the given letter ?" }, { "from": "gpt", "value": "The answer is November 27, 1973" } ], "clip_score": 23.484375, "loss": 1.7856025695800781 }, { "id": "identity_297279", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is indicated by the small short arrows in the MR images?\nChoices:\n(A) Internal necrosis \n(B) Marked enhancement \n(C) Subcutaneous soft tissue thickening \n(D) Internal bleeding" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.984375, "loss": 4.682956695556641 }, { "id": "identity_73863", "image": "TQA/images/question_images/photosynthesis_4130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the role of the sunlight in the image?" }, { "from": "gpt", "value": "The answer is Provides energy for photosynthesis" } ], "clip_score": 32.25, "loss": 2.735518455505371 }, { "id": "identity_279734", "image": "CLEVR-Math/images/CLEVR_train_001773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you subtract 1 cylinder, how many cylinders remain?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 18.953125, "loss": 2.368727445602417 }, { "id": "identity_319161", "image": "VQA-RAD/images/synpic42157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a cystic or solid structure?" }, { "from": "gpt", "value": "The answer is Cystic" } ], "clip_score": 27.546875, "loss": 1.810198187828064 }, { "id": "identity_249210", "image": "GeoQA+/images/2000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在菱形ABCD中,AB=5,BD=6,DE⊥AB于点E,求菱形ABCD的面积。" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 29.359375, "loss": 2.4549050331115723 }, { "id": "identity_338628", "image": "TabMWP/images/14232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A veterinarian in Booneville treats various kinds of animals each year. What fraction of the treated pets were birds? Simplify your answer." }, { "from": "gpt", "value": "The answer is 5/19" } ], "clip_score": 33.8125, "loss": 2.1815483570098877 }, { "id": "identity_308181", "image": "GeoQA+/images/4014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,把一个长方形纸条沿某条线折叠,已知∠ADB=32°,AE∥BD,则∠DAF为()\nChoices:\n(A) 30°\n(B) 28°\n(C) 29°\n(D) 26°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.0, "loss": 2.3599066734313965 }, { "id": "identity_287192", "image": "VQA2.0/images/226939.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the shirt worn by the baby?" }, { "from": "gpt", "value": "The answer is green" } ], "clip_score": 19.375, "loss": 3.980501890182495 }, { "id": "identity_143691", "image": "ChartQA/images/two_col_20407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of values from 2001 to 2018 on the line graph?" }, { "from": "gpt", "value": "The answer is 25428.6 million tonnes" } ], "clip_score": 29.609375, "loss": 2.9157462120056152 }, { "id": "identity_311039", "image": "A-OKVQA/images/399942.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this most likely?\nChoices:\n(A) gabon\n(B) france\n(C) egypt\n(D) japan" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 13.7578125, "loss": 3.8970065116882324 }, { "id": "identity_295478", "image": "TabMWP/images/31151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most frequently occurring number of bags of popcorn sold at the concession stand in the movie theater over the past 7 days?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 39.28125, "loss": 1.9999531507492065 }, { "id": "identity_117952", "image": "TextVQA/images/131553640bb6bcee.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the machinery to the left in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.8984375, "loss": 3.6459758281707764 }, { "id": "identity_11757", "image": "FigureQA/images/70043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Firebrick the roughest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.8984375, "loss": 3.651923418045044 }, { "id": "identity_279547", "image": "AI2D/images/4804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following figures displays a branch with no leaves at all?\nChoices:\n(A) E\n(B) B\n(C) F\n(D) A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.8125, "loss": 3.623697280883789 }, { "id": "identity_191621", "image": "PMC-VQA/images/PMC7830087_diagnostics-11-00129-f001_351373.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely purpose of combining the two types of scans in this image?" }, { "from": "gpt", "value": "The answer is To localize metabolic abnormalities with anatomical precision" } ], "clip_score": 30.78125, "loss": 3.555155038833618 }, { "id": "identity_54290", "image": "ChartQA/images/10286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people in Asia/Pacific who believe children will be financially better off than their parents?" }, { "from": "gpt", "value": "The answer is 51%" } ], "clip_score": 28.875, "loss": 2.0129220485687256 }, { "id": "identity_230997", "image": "FigureQA/images/92853.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the maximum area under the curve, and what is the approximate area value?" }, { "from": "gpt", "value": "The answer is Light Slate, approximately 3500 square units" } ], "clip_score": 23.28125, "loss": 2.734497547149658 }, { "id": "identity_115571", "image": "VQA-RAD/images/synpic35191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is visible in the center of the image?" }, { "from": "gpt", "value": "The answer is Cerebellum" } ], "clip_score": 27.609375, "loss": 2.5441062450408936 }, { "id": "identity_104248", "image": "PMC-VQA/images/PMC3535732_fig2_176722.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What bone is directly above the fixation device?" }, { "from": "gpt", "value": "The answer is Femur" } ], "clip_score": 29.203125, "loss": 3.728416681289673 }, { "id": "identity_52873", "image": "DVQA/images/bar_train_00145022.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value associated with 'dealer'?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 23.765625, "loss": 2.662294864654541 }, { "id": "identity_115956", "image": "VQA2.0/images/4706.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible man-made structures are in the background?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 15.6484375, "loss": 2.6765873432159424 }, { "id": "identity_33633", "image": "MapQA/images/map_10744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend a continuous bar ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 12.921875, "loss": 3.2420356273651123 }, { "id": "identity_102138", "image": "IconQA/images/105318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Adam and Rosa set their clocks at 6:00 yesterday, how many hours and minutes have passed until the current time shown on their clocks?" }, { "from": "gpt", "value": "The answer is Adam 1 hour 10 minutes, Rosa 1 hour 25 minutes" } ], "clip_score": 21.375, "loss": 1.6037323474884033 }, { "id": "identity_307182", "image": "ChartQA/images/two_col_24292.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio between some reasons and a different approach?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.9375, "loss": 3.4633262157440186 }, { "id": "identity_259382", "image": "TextVQA/images/da147a39299fe540.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the identification on the plane to the left of the silver plane with the identification \"G-A\"?" }, { "from": "gpt", "value": "The answer is G-AEXF" } ], "clip_score": 21.09375, "loss": 2.19539737701416 }, { "id": "identity_335521", "image": "A-OKVQA/images/407528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which places have the most palm trees?\nChoices:\n(A) arizona\n(B) california\n(C) texas\n(D) hawaii" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 11.140625, "loss": 3.7275092601776123 }, { "id": "identity_174079", "image": "IconQA/images/45248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of one side of the triangle?" }, { "from": "gpt", "value": "The answer is 4 yd" } ], "clip_score": 33.625, "loss": 2.0716283321380615 }, { "id": "identity_28096", "image": "IconQA/images/94658.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Bill's afternoon picnic is almost over. His watch shows the time. What time is it?\nChoices:\n(A) 2:00 A.M.\n(B) 2:00 P.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.5625, "loss": 3.4967079162597656 }, { "id": "identity_150004", "image": "DocVQA/images/lqxd0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the letter addressed?" }, { "from": "gpt", "value": "The answer is Robert E. Shank, M.D" } ], "clip_score": 26.578125, "loss": 2.155458927154541 }, { "id": "identity_232835", "image": "UniGeo/images/2516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,⊙O是△ABC的外接圆,若∠AOB=100°,且∠BAC=30°,则∠ACB的度数是()" }, { "from": "gpt", "value": "The answer is 100°" } ], "clip_score": 32.8125, "loss": 1.8591376543045044 }, { "id": "identity_291977", "image": "Geometry3K/images/1319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x, rounded to the nearest tenth, if line AB is tangent to the circle. \nOptions:\n(A) 5.1\n(B) 6.1\n(C) 7.1\n(D) 8.1" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.8125, "loss": 2.1532530784606934 }, { "id": "identity_138038", "image": "DVQA/images/bar_train_00194325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between 'goal' and 'taxi' for 'dream'?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 31.140625, "loss": 2.5668253898620605 }, { "id": "identity_220030", "image": "TextVQA/images/8cae6fdeca55a98f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many app icons are visible on the middle phone's screen?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 21.203125, "loss": 2.3003857135772705 }, { "id": "identity_56383", "image": "DVQA/images/bar_train_00097201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of the bar labeled \"luxury\" greater than the bar labeled \"door\"?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.921875, "loss": 3.4494452476501465 }, { "id": "identity_199212", "image": "PMC-VQA/images/PMC7457088_rcr2646-fig-0002_242503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of consolidation in the lung fields?" }, { "from": "gpt", "value": "The answer is No signs of consolidation visible" } ], "clip_score": 28.90625, "loss": 2.9081761837005615 }, { "id": "identity_176756", "image": "TabMWP/images/30872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total cost of buying one of each item listed?" }, { "from": "gpt", "value": "The answer is $20,642" } ], "clip_score": 27.734375, "loss": 2.442924976348877 }, { "id": "identity_105863", "image": "VQA-RAD/images/synpic45914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of midline shift in this image?" }, { "from": "gpt", "value": "The answer is No midline shift" } ], "clip_score": 22.75, "loss": 2.9774065017700195 }, { "id": "identity_23839", "image": "A-OKVQA/images/111406.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What city is this?\nChoices:\n(A) portland\n(B) chicago\n(C) ny\n(D) boston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.921875, "loss": 3.8420958518981934 }, { "id": "identity_214463", "image": "TextVQA/images/14376b594386ca6c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the field shown in the image?" }, { "from": "gpt", "value": "The answer is Tradition Field" } ], "clip_score": 28.546875, "loss": 2.32719349861145 }, { "id": "identity_125728", "image": "PMC-VQA/images/PMC4656243_Fig21_446782.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure indicated by the solid white arrow?" }, { "from": "gpt", "value": "The answer is Gallbladder" } ], "clip_score": 29.484375, "loss": 2.8269248008728027 }, { "id": "identity_107208", "image": "VQA2.0/images/212838.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the person in the process of opening the can?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 13.7109375, "loss": 2.934264898300171 }, { "id": "identity_317212", "image": "FigureQA/images/56175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.25, "loss": 4.332474231719971 }, { "id": "identity_192944", "image": "VQA2.0/images/11231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man on the right holding in his hand?" }, { "from": "gpt", "value": "The answer is A rope" } ], "clip_score": 14.625, "loss": 2.97301983833313 }, { "id": "identity_184939", "image": "A-OKVQA/images/62915.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What gate number is indicated near the jet bridge?" }, { "from": "gpt", "value": "The answer is C25" } ], "clip_score": 20.28125, "loss": 2.3129842281341553 }, { "id": "identity_236333", "image": "UniGeo/images/2336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在边长为9的正三角形ABC中,BD=3,∠ADE=60°,求DE的长和AE的长之和是多少?" }, { "from": "gpt", "value": "The answer is (7√3 + 21)/3" } ], "clip_score": 31.1875, "loss": 2.387805461883545 }, { "id": "identity_129284", "image": "Super-CLEVR/images/superCLEVR_new_001522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bus in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 12.359375, "loss": 3.859661102294922 }, { "id": "identity_24152", "image": "A-OKVQA/images/106375.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why does he have a bike on the back of his vehicle?\nChoices:\n(A) transporting it\n(B) stealing it\n(C) found it\n(D) selling it" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.21875, "loss": 3.977285861968994 }, { "id": "identity_335791", "image": "AI2D/images/1474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is meant by Lava?\nChoices:\n(A) Molten rock that reaches the earth's surface through a volcano or fissure.\n(B) A visible body of very fine water droplets or ice particles suspended in the atmosphere at altitudes ranging up to several miles above sea level.\n(C) To move or run smoothly with unbroken continuity, as in the manner characteristic of a fluid.\n(D) A dense flow of volcanic ash, dust, rocks and debris that cascades at high speed down the slope of a volcano during an eruption." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.15625, "loss": 3.5585923194885254 }, { "id": "identity_200071", "image": "A-OKVQA/images/15827.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in this image?" }, { "from": "gpt", "value": "The answer is Overcast or cool" } ], "clip_score": 18.28125, "loss": 3.714186906814575 }, { "id": "identity_193917", "image": "PMC-VQA/images/PMC2564839_hrt-94-05-0566-f01_28570.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the diameter of the vessel at the location of the red arrows?" }, { "from": "gpt", "value": "The answer is 0.15 mm" } ], "clip_score": 22.90625, "loss": 1.691637396812439 }, { "id": "identity_145197", "image": "DVQA/images/bar_train_00046866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people who preferred 'dark' category between 'crisis' and 'excess' objects?" }, { "from": "gpt", "value": "The answer is 6 - 2 = 4" } ], "clip_score": 29.09375, "loss": 2.065857172012329 }, { "id": "identity_120730", "image": "PMC-VQA/images/PMC8913740_Fig2_220124.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What might be the reason for the dark area in the center of the image?" }, { "from": "gpt", "value": "The answer is Blockage or stenosis" } ], "clip_score": 21.734375, "loss": 3.6488494873046875 }, { "id": "identity_190276", "image": "PMC-VQA/images/PMC4955135_Fig2_53494.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled with \"RPH\" in the image?" }, { "from": "gpt", "value": "The answer is Right Parahippocampal Gyrus" } ], "clip_score": 26.234375, "loss": 2.339120388031006 }, { "id": "identity_191788", "image": "A-OKVQA/images/126246.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any trees visible in the background of the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 14.5859375, "loss": 3.404263973236084 }, { "id": "identity_333439", "image": "DocVQA/images/nqnn0081_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total share of Plan III brand styles?" }, { "from": "gpt", "value": "The answer is 6.5%" } ], "clip_score": 22.75, "loss": 1.7142624855041504 }, { "id": "identity_49519", "image": "FigureQA/images/74153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum value of Blue Violet?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 20.6875, "loss": 3.002413034439087 }, { "id": "identity_33257", "image": "DocVQA/images/yqld0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the zipcode of Lloyd Kasper ?" }, { "from": "gpt", "value": "The answer is 03755" } ], "clip_score": 13.2578125, "loss": 1.8678061962127686 }, { "id": "identity_230575", "image": "TQA/images/question_images/human_system_digestive_1395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sequence of organs that food passes through starting from the esophagus and ending at the anus?" }, { "from": "gpt", "value": "The answer is Esophagus, stomach, small intestine, large intestine, rectum, anus" } ], "clip_score": 34.21875, "loss": 0.882486879825592 }, { "id": "identity_135498", "image": "DocVQA/images/mtnk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the assignee company?" }, { "from": "gpt", "value": "The answer is The Procter & Gamble Company" } ], "clip_score": 17.828125, "loss": 1.433658480644226 }, { "id": "identity_32071", "image": "IconQA/images/96170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dan's grandma is reading a book one evening. The clock shows the time. What time is it?\nChoices:\n(A) 8:00 P.M.\n(B) 8:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.234375, "loss": 3.621641159057617 }, { "id": "identity_49560", "image": "DocVQA/images/lpdl0226_37.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between men and women in the age group 35 - 44?" }, { "from": "gpt", "value": "The answer is 16.67%" } ], "clip_score": 24.890625, "loss": 1.4577820301055908 }, { "id": "identity_29390", "image": "PMC-VQA/images/PMC8008854_f1_406334.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the red arrow indicate in the image? \nChoices:\n(A) Absence of air in the mediastinum \n(B) Presence of air in the mediastinum \n(C) Presence of fluid in the mediastinum \n(D) Enlargement of the heart. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.15625, "loss": 4.607178688049316 }, { "id": "identity_122997", "image": "TextVQA/images/d68e5a8505f697cb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.40625, "loss": 4.372838020324707 }, { "id": "identity_152091", "image": "ScienceQA/images/2836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in the map?" }, { "from": "gpt", "value": "The answer is North Carolina" } ], "clip_score": 32.8125, "loss": 2.2532598972320557 }, { "id": "identity_289106", "image": "PlotQA/images/62095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of legend labels?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 17.546875, "loss": 2.3977086544036865 }, { "id": "identity_96820", "image": "TabMWP/images/25711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Derek gives 25 coins to Allie, how many coins will each have?" }, { "from": "gpt", "value": "The answer is Derek: 950, Allie: 977" } ], "clip_score": 27.578125, "loss": 1.7932419776916504 }, { "id": "identity_268775", "image": "PlotQA/images/93112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of distinct colored lines available?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 16.328125, "loss": 2.3169772624969482 }, { "id": "identity_34713", "image": "GeoQA+/images/2139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,利用标杆BC测量旗杆EF的高度,标杆BC长为1.2米,tanA=\\frac{3}{4},BF=8.4米,则楼高EF是()米.\nChoices:\n(A) 6.3\n(B) 7.5\n(C) 8\n(D) 6.5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 2.6341357231140137 }, { "id": "identity_289281", "image": "A-OKVQA/images/219558.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which country does the parent company of this telephone brand originate?\nChoices:\n(A) Finland\n(B) Germany\n(C) Japan\n(D) United States" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.25, "loss": 4.885775566101074 }, { "id": "identity_40149", "image": "DocVQA/images/snyc0227_307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the acceptable range of Hemoglobin for nonpregnant, nonlactating females aged ≥13 years?" }, { "from": "gpt", "value": "The answer is 11.0-14.4" } ], "clip_score": 20.09375, "loss": 1.652725100517273 }, { "id": "identity_19538", "image": "IconQA/images/97942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does this picture have symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.140625, "loss": 3.4737472534179688 }, { "id": "identity_114663", "image": "VQA-RAD/images/synpic51926.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality was used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 31.265625, "loss": 2.230970621109009 }, { "id": "identity_137863", "image": "FigureQA/images/12066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of all the colors represented on the graph?" }, { "from": "gpt", "value": "The answer is 34" } ], "clip_score": 27.953125, "loss": 2.4030165672302246 }, { "id": "identity_26297", "image": "GeoQA+/images/5599.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在Rt△ABC中,∠ACB=90°,若AB=13,则正方形ADEC与正方形BCFG的面积之和为()\nChoices:\n(A) 25\n(B) 144\n(C) 150\n(D) 169" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.84375, "loss": 2.398296594619751 }, { "id": "identity_226252", "image": "A-OKVQA/images/103393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is between the bicycle and the truck in the image?" }, { "from": "gpt", "value": "The answer is bus" } ], "clip_score": 28.171875, "loss": 3.4751274585723877 }, { "id": "identity_73538", "image": "ScienceQA/images/5293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state located at the northeastern corner of the United States?" }, { "from": "gpt", "value": "The answer is Maine" } ], "clip_score": 29.03125, "loss": 2.821742296218872 }, { "id": "identity_105799", "image": "VizWiz/images/VizWiz_train_00001773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the glass made of?" }, { "from": "gpt", "value": "The answer is Crystal" } ], "clip_score": 26.40625, "loss": 2.950387954711914 }, { "id": "identity_6182", "image": "TQA/images/question_images/dna_6760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the term for connected sugar, phosphate group and protein?\nChoices:\n(A) hydrogen bond\n(B) deoxyribose\n(C) nucleotide\n(D) sugar-phosphate backbone" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.828125, "loss": 4.119006156921387 }, { "id": "identity_22147", "image": "TQA/images/question_images/earth_eclipses_1657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the shadow cast by the moon?\nChoices:\n(A) Silhouette\n(B) Antumbra\n(C) Umbra\n(D) Penumbra" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.59375, "loss": 4.211030006408691 }, { "id": "identity_167502", "image": "TabMWP/images/8933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of superheroes listed in the class's compilation?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 19.140625, "loss": 2.1400182247161865 }, { "id": "identity_61313", "image": "DocVQA/images/fgbg0024_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest purse value for a single event?" }, { "from": "gpt", "value": "The answer is $75,000" } ], "clip_score": 19.03125, "loss": 1.7071019411087036 }, { "id": "identity_329622", "image": "DocVQA/images/rtxm0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned on this page?" }, { "from": "gpt", "value": "The answer is Friday, November 24-1972" } ], "clip_score": 21.546875, "loss": 2.821854591369629 }, { "id": "identity_45693", "image": "DVQA/images/bar_train_00117485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in accuracy between the algorithms lock and honey?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 39.84375, "loss": 2.810009002685547 }, { "id": "identity_185710", "image": "PMC-VQA/images/PMC7440628_pone.0237101.g005_238410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique was likely used to produce the colors in this image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and eosin (H&E) staining" } ], "clip_score": 28.875, "loss": 0.9943524599075317 }, { "id": "identity_99606", "image": "TabMWP/images/5532.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many necklaces have more than 30 beads?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 33.75, "loss": 2.686084032058716 }, { "id": "identity_143985", "image": "FigureQA/images/97235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Violet Red have a peak value greater than 64?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.421875, "loss": 3.0551846027374268 }, { "id": "identity_281525", "image": "DVQA/images/bar_train_00046743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By what margin is the most accurate algorithm superior to the least accurate algorithm in terms of accuracy?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 35.125, "loss": 2.6969821453094482 }, { "id": "identity_212478", "image": "VQA2.0/images/4239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person engaged in?" }, { "from": "gpt", "value": "The answer is Snowboarding" } ], "clip_score": 28.671875, "loss": 2.621119976043701 }, { "id": "identity_168876", "image": "IconQA/images/56409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sides for all shapes combined?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 27.125, "loss": 1.7096854448318481 }, { "id": "identity_226255", "image": "FigureQA/images/14153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the Crimson and Turquoise sections?" }, { "from": "gpt", "value": "The answer is 50%" } ], "clip_score": 31.34375, "loss": 1.7307919263839722 }, { "id": "identity_115835", "image": "TextVQA/images/2e4137a7899f109e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition during the game?" }, { "from": "gpt", "value": "The answer is Overcast or cloudy" } ], "clip_score": 13.5625, "loss": 2.96589732170105 }, { "id": "identity_149103", "image": "FigureQA/images/9610.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Rosy Brown?" }, { "from": "gpt", "value": "The answer is Approximately 20" } ], "clip_score": 21.484375, "loss": 2.913655996322632 }, { "id": "identity_170945", "image": "IconQA/images/57164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more plates than spoons?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 36.0, "loss": 3.1907315254211426 }, { "id": "identity_156982", "image": "TQA/images/question_images/photosynthesis_1316.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process do the arrows labeled \"carbon dioxide\" and \"oxygen\" represent in the diagram?" }, { "from": "gpt", "value": "The answer is Gas exchange in photosynthesis" } ], "clip_score": 31.6875, "loss": 3.2482502460479736 }, { "id": "identity_285511", "image": "TabMWP/images/19428.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money did Toby spend on 3 pounds of long grain rice at the store?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.65625, "loss": 3.5609188079833984 }, { "id": "identity_242590", "image": "TabMWP/images/29629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which movie starts immediately after the movie that ends at 8:25 P.M.?" }, { "from": "gpt", "value": "The answer is none" } ], "clip_score": 28.9375, "loss": 4.882750034332275 }, { "id": "identity_116723", "image": "PMC-VQA/images/PMC8554992_Fig2_99291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the dark triangular structure in the center of the image?" }, { "from": "gpt", "value": "The answer is Meniscus" } ], "clip_score": 27.109375, "loss": 3.808743715286255 }, { "id": "identity_249393", "image": "DVQA/images/bar_train_00092835.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of the algorithms with accuracies lower than 6?" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 30.703125, "loss": 1.586167812347412 }, { "id": "identity_223551", "image": "A-OKVQA/images/210728.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is most visible in the image?" }, { "from": "gpt", "value": "The answer is Motorbikes" } ], "clip_score": 17.015625, "loss": 3.002452850341797 }, { "id": "identity_187626", "image": "PMC-VQA/images/PMC6539121_ijms-20-02332-f002_486355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any pathological features in this liver tissue section?" }, { "from": "gpt", "value": "The answer is Pathological features identified, if any" } ], "clip_score": 29.140625, "loss": 4.1187286376953125 }, { "id": "identity_277525", "image": "VQA2.0/images/29456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sign in the background relevant to the individuals in the picture at this moment?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 19.0625, "loss": 3.0242130756378174 }, { "id": "identity_294308", "image": "A-OKVQA/images/463665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird is the least likely to be the first to catch a bug from the ground among the following options? \n(A) rightmost bird\n(B) flying one\n(C) front most\n(D) far left" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.703125, "loss": 4.132999420166016 }, { "id": "identity_321382", "image": "PMC-VQA/images/PMC7408080_brainsci-10-00440-f001_228733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is visible in the left hemisphere?\nChoices:\n(A) A single lesion in the middle cerebral artery.\n(B) Multiple lesions in the posterior cerebral artery.\n(C) Multiple lesions in the border area between the middle cerebral artery and the posterior cerebral artery.\n(D) High signal intensity lesions on CT." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.234375, "loss": 3.655590057373047 }, { "id": "identity_102277", "image": "TabMWP/images/18876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Roger buys one mixing bowl and two cookie sheets, how much money will he spend?" }, { "from": "gpt", "value": "The answer is Roger will spend $14" } ], "clip_score": 33.40625, "loss": 2.5894134044647217 }, { "id": "identity_60132", "image": "DVQA/images/bar_train_00071175.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people preferring 'cup' across all categories?" }, { "from": "gpt", "value": "The answer is Approximately 140%" } ], "clip_score": 31.59375, "loss": 2.434112548828125 }, { "id": "identity_294406", "image": "TextVQA/images/57e1ac153cde6348.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of beer is this?" }, { "from": "gpt", "value": "The answer is floris" } ], "clip_score": 36.21875, "loss": 4.159999370574951 }, { "id": "identity_227430", "image": "ChartQA/images/multi_col_100419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total projected population growth percentage from 2022 to 2030 for all age groups combined" }, { "from": "gpt", "value": "The answer is 8.57" } ], "clip_score": 26.734375, "loss": 2.482407808303833 }, { "id": "identity_83062", "image": "UniGeo/images/2365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a person stands still on the escalator and it takes 30 seconds to reach the top, how much distance in meters does the person travel vertically?" }, { "from": "gpt", "value": "The answer is 6 meters" } ], "clip_score": 25.5625, "loss": 2.2360074520111084 }, { "id": "identity_245680", "image": "A-OKVQA/images/431854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the player holding in their right hand and what is its orientation?" }, { "from": "gpt", "value": "The answer is Tennis racquet, facing down" } ], "clip_score": 28.640625, "loss": 3.1721439361572266 }, { "id": "identity_179729", "image": "IconQA/images/33440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of small objects to large objects?" }, { "from": "gpt", "value": "The answer is 13:2" } ], "clip_score": 24.21875, "loss": 2.0635194778442383 }, { "id": "identity_311454", "image": "AI2D/images/1989.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If the sagebrush was removed from this ecosystem, which animal would be left without any food sources?\nChoices:\n(A) Mule deer\n(B) Antelope\n(C) Moose\n(D) Man" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.671875, "loss": 4.125126838684082 }, { "id": "identity_52021", "image": "FigureQA/images/56475.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the smallest percentage in the pie chart?" }, { "from": "gpt", "value": "The answer is Dark Green" } ], "clip_score": 37.28125, "loss": 2.7289836406707764 }, { "id": "identity_166692", "image": "IconQA/images/46393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are all flowers identical?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.46875, "loss": 3.2454752922058105 }, { "id": "identity_36302", "image": "TextVQA/images/743eea30977ef34f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the road name?" }, { "from": "gpt", "value": "The answer is harvard rd." } ], "clip_score": 27.703125, "loss": 1.9390027523040771 }, { "id": "identity_230407", "image": "FigureQA/images/37359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between the largest and smallest segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 15%" } ], "clip_score": 31.828125, "loss": 1.7961795330047607 }, { "id": "identity_133527", "image": "A-OKVQA/images/392144.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the aircraft in the background?" }, { "from": "gpt", "value": "The answer is Red and White" } ], "clip_score": 21.90625, "loss": 2.664546251296997 }, { "id": "identity_199445", "image": "PMC-VQA/images/PMC6889731_Fig1_77628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the index number mentioned in the image?" }, { "from": "gpt", "value": "The answer is 14.0" } ], "clip_score": 19.46875, "loss": 2.3405346870422363 }, { "id": "identity_117202", "image": "VQA2.0/images/542974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the carriage positioned on a paved road?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.203125, "loss": 3.266719341278076 }, { "id": "identity_73480", "image": "ScienceQA/images/18416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the river that forms the western boundary of the highlighted colony?" }, { "from": "gpt", "value": "The answer is Hudson River" } ], "clip_score": 26.03125, "loss": 2.8873231410980225 }, { "id": "identity_207690", "image": "VQA-AS/images/18025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teddy bears can be seen on the floor?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 26.265625, "loss": 2.436143636703491 }, { "id": "identity_47387", "image": "MapQA/images/map_2801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color code representing the number of puppets in the state of Ohio?" }, { "from": "gpt", "value": "The answer is 228282-237615" } ], "clip_score": 34.15625, "loss": 2.563899040222168 }, { "id": "identity_300164", "image": "GeoQA+/images/3144.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,直线PA与⊙O相切于点A,PO交⊙O于点C,连接BC.若∠P=40°,则∠ABC的度数是多少?\n\nChoices:\n(A) 20°\n(B) 25°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.640625, "loss": 2.714245557785034 }, { "id": "identity_85773", "image": "Geometry3K/images/1591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle PRQ in the circle?" }, { "from": "gpt", "value": "The answer is 65 degrees" } ], "clip_score": 36.125, "loss": 2.3847460746765137 }, { "id": "identity_146316", "image": "DocVQA/images/xrcy0227_32.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the check to be drawn to?" }, { "from": "gpt", "value": "The answer is CHANNING H. LUSHBOUGH" } ], "clip_score": 22.03125, "loss": 1.6812736988067627 }, { "id": "identity_116554", "image": "A-OKVQA/images/423602.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the terrain like where the individuals are walking?" }, { "from": "gpt", "value": "The answer is Snow-covered and rocky" } ], "clip_score": 23.09375, "loss": 2.7159643173217773 }, { "id": "identity_120288", "image": "VQA-AS/images/14407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pillows are on the furniture?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.453125, "loss": 4.89927864074707 }, { "id": "identity_764", "image": "ScienceQA/images/13440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Connecticut?\nChoices:\n(A) Juneau\n(B) Hartford\n(C) Des Moines\n(D) Bridgeport" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.515625, "loss": 4.324105262756348 }, { "id": "identity_260543", "image": "Geometry3K/images/1114.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If \\( F K = 3x - 1 \\), \\( K G = 4y + 3 \\), \\( J K = 6y - 2 \\), and \\( K H = 2x + 3 \\), find \\( x \\) and \\( y \\) so that the quadrilateral is a parallelogram" }, { "from": "gpt", "value": "The answer is \\( x = 4 \\) and \\( y = 2.5 \\)" } ], "clip_score": 30.984375, "loss": 1.7247041463851929 }, { "id": "identity_297066", "image": "PMC-VQA/images/PMC3540650_fig1_177766.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the image, which muscle shows diffuse swelling?\nChoices:\n(A) Left superior oblique muscle\n(B) Right inferior oblique muscle\n(C) Left inferior oblique muscle\n(D) Right superior rectus muscle" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.859375, "loss": 4.100564956665039 }, { "id": "identity_25340", "image": "VQA-AS/images/14332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Will the metal slide be hot if the sun is shining on it?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.25, "loss": 3.485535144805908 }, { "id": "identity_227904", "image": "VizWiz/images/VizWiz_train_00003983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key features of this object that make it suitable for its intended use?" }, { "from": "gpt", "value": "The answer is Spill-proof lid, easy grip, durable material, appropriate size" } ], "clip_score": 26.796875, "loss": 2.6313250064849854 }, { "id": "identity_325400", "image": "AI2D/images/3457.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is this a diagram of?\nChoices:\n(A) bird\n(B) insect\n(C) goat\n(D) none of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.015625, "loss": 4.075406074523926 }, { "id": "identity_230383", "image": "DocVQA/images/phvd0227_42.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What changes were made to the Short-Doyle Act in 1963, and how did these changes impact the services provided by the County General Hospital?" }, { "from": "gpt", "value": "The answer is Reimbursement expanded to include both voluntary and involuntary patients" } ], "clip_score": 27.703125, "loss": 2.772852897644043 }, { "id": "identity_79236", "image": "GeoQA+/images/4831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that BC is the diameter and ∠D = 32°, find the measure of ∠ADC" }, { "from": "gpt", "value": "The answer is 148°" } ], "clip_score": 35.4375, "loss": 1.6615047454833984 }, { "id": "identity_164335", "image": "UniGeo/images/810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In right triangle ABC, with angle BAC equal to 90 degrees, AC equals 3, and AB equals 4, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is arctan(3/4) degrees" } ], "clip_score": 33.71875, "loss": 2.143970489501953 }, { "id": "identity_56218", "image": "DocVQA/images/kgcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the correct status of November 28 for the Medical Campus staff?" }, { "from": "gpt", "value": "The answer is It is a normal work day" } ], "clip_score": 21.453125, "loss": 3.365931272506714 }, { "id": "identity_129298", "image": "Super-CLEVR/images/superCLEVR_new_001559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there an object that is both larger than the red shiny motorbike and made of metal?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.34375, "loss": 3.309202194213867 }, { "id": "identity_214849", "image": "TextVQA/images/869967de43a9d3a2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the back of the mascot's jersey?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 21.015625, "loss": 2.476998805999756 }, { "id": "identity_250108", "image": "PMC-VQA/images/PMC9293144_dc24875-fig-0002_343806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main components of the Papanicolaou stain, and how do they differentially stain various cellular structures?" }, { "from": "gpt", "value": "The answer is Hematoxylin, Orange G, Eosin Azure" } ], "clip_score": 26.3125, "loss": 3.464338779449463 }, { "id": "identity_305308", "image": "TQA/images/question_images/ocean_waves_9151.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many crests are shown in the diagram?\nChoices:\n(A) 1\n(B) 3\n(C) 2\n(D) 4" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.546875, "loss": 3.485638380050659 }, { "id": "identity_98363", "image": "IconQA/images/84295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Dave needs to leave for work at 9:30 AM, how many minutes does he have to get ready?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 27.703125, "loss": 1.923957109451294 }, { "id": "identity_191059", "image": "VQA2.0/images/267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the cow statue doing?" }, { "from": "gpt", "value": "The answer is Balancing a yellow disc on its nose" } ], "clip_score": 29.015625, "loss": 3.328200340270996 }, { "id": "identity_318013", "image": "PMC-VQA/images/PMC4101714_F2_306698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What imaging method was utilized for the biopsy?\nChoices:\n(A) MRI\n(B) CT scan\n(C) Ultrasound\n(D) X-ray" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 3.9065043926239014 }, { "id": "identity_303306", "image": "Super-CLEVR/images/superCLEVR_new_001563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the shiny tandem bike in front of the cyan shiny thing and the big metal motorbike the same size?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 31.203125, "loss": 5.173127174377441 }, { "id": "identity_230100", "image": "TabMWP/images/21054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of pages written per day over the 4 days, and how does this compare to the number of pages written on Friday?" }, { "from": "gpt", "value": "The answer is The average is 75, 4 pages less than Friday" } ], "clip_score": 35.375, "loss": 3.164421796798706 }, { "id": "identity_207235", "image": "PMC-VQA/images/PMC8638553_Fig6_129316.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a scale bar present in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.75, "loss": 3.1182072162628174 }, { "id": "identity_197831", "image": "VQA-AS/images/10186.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.859375, "loss": 2.316127300262451 }, { "id": "identity_233978", "image": "AI2D/images/501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the complete food chain involving the larger fish, the smaller fish, and their prey?" }, { "from": "gpt", "value": "The answer is small aquatic organisms → smaller fish → larger fish" } ], "clip_score": 32.71875, "loss": 2.5153629779815674 }, { "id": "identity_72478", "image": "ScienceQA/images/11337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of nails in the image?" }, { "from": "gpt", "value": "The answer is YOUR CORRECT ANSWER" } ], "clip_score": 20.359375, "loss": 2.630615234375 }, { "id": "identity_145397", "image": "MapQA/images/map_2051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of rates for the state of Florida?" }, { "from": "gpt", "value": "The answer is 37.1%-40.8%" } ], "clip_score": 32.40625, "loss": 1.8410446643829346 }, { "id": "identity_32692", "image": "PMC-VQA/images/PMC4604556_f2_433091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the visualization showing? \nChoices:\n(A) A dorsal view of An. sinensis \n(B) A ventral view of An. sinensis \n(C) An axial view of An. sinensis \n(D) A lateral view of An. sinensis. " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.765625, "loss": 4.06079626083374 }, { "id": "identity_4367", "image": "Geometry3K/images/1244.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: \\triangle P Q S \\cong \\triangle R Q S. Find y.\nChoices:\n(A) 8\n(B) 12\n(C) 16\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.15625, "loss": 2.4614768028259277 }, { "id": "identity_138858", "image": "DocVQA/images/rrcw0217_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific symptom is mentioned in relation to the effect of amantadine hydrochloride in Pauline M. Maki, Ph.D.'s research?" }, { "from": "gpt", "value": "The answer is Symptoms of frontal lobe dysfunction in brain injury" } ], "clip_score": 22.53125, "loss": 2.703730583190918 }, { "id": "identity_181012", "image": "TabMWP/images/13515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of keys that open the front door of the house?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 35.34375, "loss": 2.6645450592041016 }, { "id": "identity_30800", "image": "A-OKVQA/images/345702.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What street is this event happening on?\nChoices:\n(A) 3rd\n(B) 200th\n(C) 4th\n(D) north" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.5703125, "loss": 3.6938629150390625 }, { "id": "identity_46032", "image": "FigureQA/images/96892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any overlap between Dodger Blue and Medium Mint data points?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 36.90625, "loss": 2.9530186653137207 }, { "id": "identity_80835", "image": "UniGeo/images/2470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BPC in the given diagram?" }, { "from": "gpt", "value": "The answer is 149°" } ], "clip_score": 32.40625, "loss": 2.568260669708252 }, { "id": "identity_319017", "image": "VizWiz/images/VizWiz_train_00017228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is inside this?" }, { "from": "gpt", "value": "The answer is chili beans" } ], "clip_score": 29.078125, "loss": 2.336770534515381 }, { "id": "identity_44544", "image": "FigureQA/images/67353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum value represented by the color Chocolate on the graph?" }, { "from": "gpt", "value": "The answer is Approximately 40" } ], "clip_score": 25.703125, "loss": 2.7912240028381348 }, { "id": "identity_169799", "image": "IconQA/images/15987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the crabs, how many objects remain?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 19.203125, "loss": 2.122243881225586 }, { "id": "identity_64683", "image": "TQA/images/question_images/cycle_rock_6733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What happens to magma after volcanic eruptions according to the diagram?" }, { "from": "gpt", "value": "The answer is Cooling and crystallization" } ], "clip_score": 25.890625, "loss": 1.987175464630127 }, { "id": "identity_784", "image": "ScienceQA/images/5955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Rhode Island?\nChoices:\n(A) Concord\n(B) Providence\n(C) Annapolis\n(D) Montpelier" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.515625, "loss": 4.123041152954102 }, { "id": "identity_67795", "image": "ScienceQA/images/6325.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any bubble that is not fully within the frame of the image?" }, { "from": "gpt", "value": "The answer is Yes, there are bubbles not fully within the frame" } ], "clip_score": 31.109375, "loss": 1.777438759803772 }, { "id": "identity_15504", "image": "VQA-AS/images/8210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this house a mess?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.71875, "loss": 3.4425578117370605 }, { "id": "identity_202306", "image": "PMC-VQA/images/PMC7487555_Fig6_252843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the color scale in the image linear or non-linear?" }, { "from": "gpt", "value": "The answer is Linear" } ], "clip_score": 20.046875, "loss": 3.689237594604492 }, { "id": "identity_5670", "image": "TextVQA/images/3fbdd8ec7e440208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: when is the event?" }, { "from": "gpt", "value": "The answer is 25 february" } ], "clip_score": 10.6328125, "loss": 2.8883285522460938 }, { "id": "identity_160186", "image": "GeoQA+/images/4241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment DE if the radius of the circle with center O is 5 units?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 32.34375, "loss": 2.1568961143493652 }, { "id": "identity_160583", "image": "GeoQA+/images/1942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the parallelogram ABCD, if the measure of angle A is 110 degrees, what is the sum of the measures of angles A and C?" }, { "from": "gpt", "value": "The answer is 220 degrees" } ], "clip_score": 28.4375, "loss": 1.3575290441513062 }, { "id": "identity_184946", "image": "PMC-VQA/images/PMC4377378_fig2_372592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the pituitary mass in relation to the pituitary gland?" }, { "from": "gpt", "value": "The answer is Larger than the pituitary gland" } ], "clip_score": 29.765625, "loss": 1.6402461528778076 }, { "id": "identity_143382", "image": "DocVQA/images/ghpj0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the institute mentioned in the document?" }, { "from": "gpt", "value": "The answer is THE AMERICAN FROZEN FOOD INSTITUTE" } ], "clip_score": 22.8125, "loss": 1.9287199974060059 }, { "id": "identity_44096", "image": "FigureQA/images/6140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of all the bars on the y-axis?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 23.640625, "loss": 2.288778305053711 }, { "id": "identity_32975", "image": "Super-CLEVR/images/superCLEVR_new_001031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What size is the purple object that is made of the same material as the sedan?" }, { "from": "gpt", "value": "The answer is small" } ], "clip_score": 29.15625, "loss": 3.6950905323028564 }, { "id": "identity_247962", "image": "ChartQA/images/43692574000916.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average ozone concentration of the four countries shown in the bar chart?" }, { "from": "gpt", "value": "The answer is 54.75 ppb" } ], "clip_score": 28.625, "loss": 1.9887205362319946 }, { "id": "identity_219035", "image": "TextVQA/images/442d372e1f977999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the tag hanging from the person on the right's teapot?" }, { "from": "gpt", "value": "The answer is DRINK ME" } ], "clip_score": 13.5390625, "loss": 3.092700242996216 }, { "id": "identity_232630", "image": "FigureQA/images/90032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the Green Yellow line over the entire x-axis range, and how does it compare to the average value of the Orange line over the same range?" }, { "from": "gpt", "value": "The answer is The average value of Green Yellow is less than Orange" } ], "clip_score": 32.53125, "loss": 2.4550821781158447 }, { "id": "identity_76131", "image": "ScienceQA/images/9649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly east of Alabama?" }, { "from": "gpt", "value": "The answer is Georgia" } ], "clip_score": 22.25, "loss": 2.9829235076904297 }, { "id": "identity_64699", "image": "ScienceQA/images/19574.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the image is a decomposer?" }, { "from": "gpt", "value": "The answer is Mushroom and lichen" } ], "clip_score": 22.78125, "loss": 2.7230207920074463 }, { "id": "identity_243413", "image": "ScienceQA/images/10241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the nutrient-rich soil in Catoctin Mountain Park contribute to the biodiversity of tree species in the ecosystem, and what impact does this have on the overall health of the forest?" }, { "from": "gpt", "value": "The answer is Supports biodiversity and overall forest health" } ], "clip_score": 28.921875, "loss": 3.2949459552764893 }, { "id": "identity_120849", "image": "VQA2.0/images/253556.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the water surrounding the boat?" }, { "from": "gpt", "value": "The answer is The water is calm with minor ripples" } ], "clip_score": 22.765625, "loss": 2.6989152431488037 }, { "id": "identity_143893", "image": "DocVQA/images/lmdg0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trips were made on June 14th, 1967?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.140625, "loss": 2.5408284664154053 }, { "id": "identity_69880", "image": "ScienceQA/images/21096.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of habitat does the bird in the image prefer?" }, { "from": "gpt", "value": "The answer is Forests, woodlands, suburban areas" } ], "clip_score": 25.90625, "loss": 2.45558762550354 }, { "id": "identity_7575", "image": "UniGeo/images/1967.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,线段AB=18cm,BC=6cm,D为BC的中点,则线段AD的长为()\nChoices:\n(A) 12cm\n(B) 15cm\n(C) 13cm\n(D) 11cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.828125, "loss": 2.4933905601501465 }, { "id": "identity_179225", "image": "IconQA/images/18653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the net is cut along the dotted line and assembled into a 3D shape, what type of base will the cylinder have?" }, { "from": "gpt", "value": "The answer is Circular base" } ], "clip_score": 30.921875, "loss": 2.8975932598114014 }, { "id": "identity_279386", "image": "Geometry3K/images/401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that each pair of polygons is similar, what is the value of C D? \n(A) 3.333\n(B) 10/3\n(C) 4\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.328125, "loss": 2.8754584789276123 }, { "id": "identity_231169", "image": "UniGeo/images/2056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB∥CD,∠A=70°,OC=OE,求∠COE的度数。" }, { "from": "gpt", "value": "The answer is 110°" } ], "clip_score": 32.0625, "loss": 1.6462626457214355 }, { "id": "identity_240023", "image": "AI2D/images/3034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the part of the cell labeled I and describe its primary function in protein synthesis" }, { "from": "gpt", "value": "The answer is Ribosome; translates mRNA into polypeptide chains for protein synthesis" } ], "clip_score": 28.78125, "loss": 1.8742839097976685 }, { "id": "identity_331136", "image": "DocVQA/images/qghv0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the person moderating?" }, { "from": "gpt", "value": "The answer is Michele F. Crown, ESQ." } ], "clip_score": 19.8125, "loss": 2.1973469257354736 }, { "id": "identity_119942", "image": "TextVQA/images/e102801748581b7a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the age range recommended for the LEGO set depicted in the image?" }, { "from": "gpt", "value": "The answer is 6-12" } ], "clip_score": 25.984375, "loss": 2.1163156032562256 }, { "id": "identity_107271", "image": "VQA2.0/images/244135.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrians can be seen on the sidewalk?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.78125, "loss": 2.4618606567382812 }, { "id": "identity_219544", "image": "Super-CLEVR/images/superCLEVR_new_000889.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the largest in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 19.828125, "loss": 3.7459568977355957 }, { "id": "identity_879", "image": "TextVQA/images/dff055dff5d8b847.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the title?" }, { "from": "gpt", "value": "The answer is the freewheeler" } ], "clip_score": 31.40625, "loss": 2.0230562686920166 }, { "id": "identity_174524", "image": "TabMWP/images/17719.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the percentage increase in the price of Timothy seed from 2005 to 2006?" }, { "from": "gpt", "value": "The answer is 57.14%" } ], "clip_score": 29.84375, "loss": 1.650007724761963 }, { "id": "identity_129571", "image": "VQA2.0/images/16195.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition at the time the photo was taken?" }, { "from": "gpt", "value": "The answer is Cloudy with light snowfall" } ], "clip_score": 15.5625, "loss": 2.765432357788086 }, { "id": "identity_123585", "image": "VQA2.0/images/370768.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the airplane flying above or below the clouds?" }, { "from": "gpt", "value": "The answer is Below the clouds" } ], "clip_score": 26.53125, "loss": 2.678840160369873 }, { "id": "identity_302245", "image": "CLEVR-Math/images/CLEVR_train_029634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Add 3 large matte cylinders. How many items are there?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 31.984375, "loss": 1.9054325819015503 }, { "id": "identity_151700", "image": "ScienceQA/images/17876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material are the toys made of, as indicated in the image?" }, { "from": "gpt", "value": "The answer is Rubber" } ], "clip_score": 27.796875, "loss": 2.5603370666503906 }, { "id": "identity_65711", "image": "ScienceQA/images/20365.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What design is engraved at the handle of the knife?" }, { "from": "gpt", "value": "The answer is A floral or decorative design" } ], "clip_score": 26.90625, "loss": 3.1678764820098877 }, { "id": "identity_282217", "image": "CLEVR-Math/images/CLEVR_train_001641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the cubes, how many objects remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.453125, "loss": 1.9687137603759766 }, { "id": "identity_56889", "image": "PlotQA/images/13508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between 2012 and 2014, did the Overall level assessment rating improve, decline, or remain the same?" }, { "from": "gpt", "value": "The answer is Improved (from 40 to 50)" } ], "clip_score": 24.5625, "loss": 2.0414352416992188 }, { "id": "identity_212637", "image": "TextVQA/images/72c00d7f4360eef9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In what month and year was this magazine issue published?" }, { "from": "gpt", "value": "The answer is March 1996" } ], "clip_score": 26.625, "loss": 1.686711072921753 }, { "id": "identity_281910", "image": "FigureQA/images/91479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Medium Periwinkle the color with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.59375, "loss": 4.649470329284668 }, { "id": "identity_83290", "image": "Geometry3K/images/616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADC if AD is the altitude of triangle ABC and x is 5?" }, { "from": "gpt", "value": "The answer is 45 degrees" } ], "clip_score": 34.4375, "loss": 1.7137746810913086 }, { "id": "identity_151586", "image": "ScienceQA/images/2215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of rock is shown in the image?" }, { "from": "gpt", "value": "The answer is Sedimentary rock" } ], "clip_score": 26.640625, "loss": 2.9999849796295166 }, { "id": "identity_16858", "image": "Super-CLEVR/images/superCLEVR_new_001523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many large objects are both behind the large blue chopper and right of the big rubber bicycle?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.09375, "loss": 2.650377035140991 }, { "id": "identity_233129", "image": "TextVQA/images/e936b9847ac95495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total cost of all the phones displayed in the image?" }, { "from": "gpt", "value": "The answer is $1019.90" } ], "clip_score": 25.265625, "loss": 2.070066452026367 }, { "id": "identity_170879", "image": "TabMWP/images/12469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of tickets sold per day from Saturday to Tuesday?" }, { "from": "gpt", "value": "The answer is 4,460.75" } ], "clip_score": 35.21875, "loss": 2.0683672428131104 }, { "id": "identity_13065", "image": "IconQA/images/36909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of diamonds even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.046875, "loss": 3.988722562789917 }, { "id": "identity_17105", "image": "VQA2.0/images/406205.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the road?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 20.75, "loss": 3.164203643798828 }, { "id": "identity_127641", "image": "PMC-VQA/images/PMC5075973_F2_87014.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the lesion in centimeters?" }, { "from": "gpt", "value": "The answer is Approximately 1 cm" } ], "clip_score": 19.09375, "loss": 2.3297910690307617 }, { "id": "identity_171946", "image": "TabMWP/images/8427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of balloons sold between the day with the most sales and the day with the least sales?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 38.40625, "loss": 2.8139216899871826 }, { "id": "identity_72263", "image": "TQA/images/abc_question_images/earth_eclipses_11686.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which image is the Moon in the umbra of the Earth's shadow?" }, { "from": "gpt", "value": "The answer is T" } ], "clip_score": 26.96875, "loss": 3.2490973472595215 }, { "id": "identity_259241", "image": "IconQA/images/57164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many additional spoons are needed so that each plate has one spoon?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 36.8125, "loss": 2.3031327724456787 }, { "id": "identity_74260", "image": "TQA/images/abc_question_images/muscle_fiber_17088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the bundle of muscle fibers?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.625, "loss": 2.8478710651397705 }, { "id": "identity_309335", "image": "VizWiz/images/VizWiz_train_00005040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the expiration date?" }, { "from": "gpt", "value": "The answer is 2014" } ], "clip_score": 28.453125, "loss": 1.8489655256271362 }, { "id": "identity_33109", "image": "A-OKVQA/images/306030.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country headquarters the brand company of the laptop?\nChoices:\n(A) italy\n(B) india\n(C) japan\n(D) china" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.640625, "loss": 4.251698970794678 }, { "id": "identity_199220", "image": "A-OKVQA/images/231601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the animals?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 22.640625, "loss": 3.218256711959839 }, { "id": "identity_3476", "image": "TQA/images/question_images/glaciers_6935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the area of a glacier where the removal of snow and ice is greater than the amount replaced.\nChoices:\n(A) Ablation zone\n(B) Sediments\n(C) Accumulation zone\n(D) Equilibrium line" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.09375, "loss": 3.587151050567627 }, { "id": "identity_168019", "image": "TabMWP/images/22305.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total quantity demanded at the prices of $940 and $1,050 combined?" }, { "from": "gpt", "value": "The answer is 38,400" } ], "clip_score": 30.265625, "loss": 1.895328164100647 }, { "id": "identity_25420", "image": "PMC-VQA/images/PMC6598616_F2_1086.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the arrow indicating in the given image? \nChoices:\n(A) internal table of the frontal bone \n(B) external table of the parietal bone \n(C) external table of the frontal bone \n(D) internal table of the parietal bone " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.078125, "loss": 4.378718376159668 }, { "id": "identity_139125", "image": "DVQA/images/bar_train_00132018.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of people who prefer the object \"care\"?" }, { "from": "gpt", "value": "The answer is Approximately 30%" } ], "clip_score": 27.875, "loss": 2.3684072494506836 }, { "id": "identity_39271", "image": "DVQA/images/bar_train_00010583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy value for the 'cotton' dataset in the 'patrol' category?" }, { "from": "gpt", "value": "The answer is approximately 6" } ], "clip_score": 27.296875, "loss": 3.429649829864502 }, { "id": "identity_308689", "image": "VQA-AS/images/6439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this doing on the left?" }, { "from": "gpt", "value": "The answer is resting" } ], "clip_score": 22.015625, "loss": 3.177082061767578 }, { "id": "identity_293974", "image": "ScienceQA/images/14601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which ocean is being pointed out in the highlighted area?\nChoices:\n(A) the Pacific Ocean\n(B) the Arctic Ocean\n(C) the Southern Ocean\n(D) the Atlantic Ocean" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.65625, "loss": 3.793058395385742 }, { "id": "identity_114512", "image": "VQA2.0/images/354305.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are parked next to the building?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 17.3125, "loss": 4.790530681610107 }, { "id": "identity_78618", "image": "AI2D/images/3701.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which labeled part in the diagram is the final section of the digestive system?" }, { "from": "gpt", "value": "The answer is Anus" } ], "clip_score": 33.71875, "loss": 2.304793119430542 }, { "id": "identity_184819", "image": "Super-CLEVR/images/superCLEVR_new_000378.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle located between the blue bus and the largest airplane?" }, { "from": "gpt", "value": "The answer is Gold" } ], "clip_score": 29.09375, "loss": 4.018918514251709 }, { "id": "identity_28532", "image": "PMC-VQA/images/PMC2614448_f4_32142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the invaginated structure seen in the uppermost spherule? \nChoices:\n(A) Synaptic ribbon \n(B) Cross-section of a rod soma \n(C) HCat process \n(D) None of the above " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.703125, "loss": 4.5812883377075195 }, { "id": "identity_107120", "image": "PMC-VQA/images/PMC8400526_pharmaceutics-13-01127-f005_47049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique appears to have been used in these images?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) staining" } ], "clip_score": 33.0625, "loss": 0.9054995179176331 }, { "id": "identity_260203", "image": "TabMWP/images/8783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Emily need to buy 3 opal pendants, 4 garnet rings, and 5 jade necklaces?" }, { "from": "gpt", "value": "The answer is $9,575.00" } ], "clip_score": 34.125, "loss": 1.852085828781128 }, { "id": "identity_23919", "image": "A-OKVQA/images/6753.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of fruit is in the image?\nChoices:\n(A) watermelon\n(B) banana\n(C) strawberry\n(D) cantaloupe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.015625, "loss": 3.906276226043701 }, { "id": "identity_338579", "image": "FigureQA/images/59484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Light Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.2890625, "loss": 3.9487013816833496 }, { "id": "identity_16874", "image": "ScienceQA/images/6044.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which statement describes the Kaeng Krachan National Park ecosystem?\nChoices:\n(A) It has soil that is rich in nutrients.\n(B) It has only a few types of organisms.\n(C) It has soil that is poor in nutrients." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.796875, "loss": 3.893996000289917 }, { "id": "identity_57906", "image": "DVQA/images/bar_train_00046885.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many labels have a positive value for both 'tea' and 'hill' categories?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.625, "loss": 2.301236867904663 }, { "id": "identity_113192", "image": "PMC-VQA/images/PMC6373287_Fig1_435113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure labeled with the letter 'B'?" }, { "from": "gpt", "value": "The answer is Sphenoid sinus" } ], "clip_score": 30.984375, "loss": 1.7502192258834839 }, { "id": "identity_167582", "image": "IconQA/images/53869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Simon's watch is 15 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 12:45" } ], "clip_score": 26.546875, "loss": 1.5932706594467163 }, { "id": "identity_327812", "image": "TextVQA/images/4ede0cdf42e37c0f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What letter can be seen on the green shirt?" }, { "from": "gpt", "value": "The answer is m" } ], "clip_score": 15.9921875, "loss": 4.032691478729248 }, { "id": "identity_164377", "image": "GeoQA+/images/105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle γ that is adjacent to angle α?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 35.84375, "loss": 2.2341861724853516 }, { "id": "identity_16420", "image": "VQA2.0/images/15097.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this photo from decades past?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.28125, "loss": 3.343935489654541 }, { "id": "identity_215193", "image": "A-OKVQA/images/259099.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is YOUR CORRECT ANSWER" } ], "clip_score": 14.53125, "loss": 2.754028081893921 }, { "id": "identity_211460", "image": "PMC-VQA/images/PMC9157756_F1_298790.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What abnormality can be observed in the image?" }, { "from": "gpt", "value": "The answer is Central hyperintensity" } ], "clip_score": 28.484375, "loss": 4.641763687133789 }, { "id": "identity_182372", "image": "PMC-VQA/images/PMC6350628_fig1_428427.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What colors are used to represent elements in the bottom images?" }, { "from": "gpt", "value": "The answer is Red, Blue, Green" } ], "clip_score": 18.578125, "loss": 2.3325438499450684 }, { "id": "identity_113558", "image": "A-OKVQA/images/17350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the life jacket worn by the person in the image?" }, { "from": "gpt", "value": "The answer is The life jacket is orange" } ], "clip_score": 19.140625, "loss": 2.709883213043213 }, { "id": "identity_67444", "image": "AI2D/images/1848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the food chain diagram is a secondary consumer?" }, { "from": "gpt", "value": "The answer is Snakes" } ], "clip_score": 32.5, "loss": 2.4963672161102295 }, { "id": "identity_14029", "image": "A-OKVQA/images/518511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For which city does this fire truck perform work?\nChoices:\n(A) new york\n(B) kentucky\n(C) arkansas\n(D) new jersey" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.6875, "loss": 4.085608005523682 }, { "id": "identity_91206", "image": "UniGeo/images/1415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If PA and PB are tangents to circle O at points A and B respectively, and angle P is 70 degrees, what is the measure of angle APB?" }, { "from": "gpt", "value": "The answer is 125 degrees" } ], "clip_score": 33.3125, "loss": 1.8716899156570435 }, { "id": "identity_36233", "image": "IconQA/images/50220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Fill in the blank to describe the model. The model has 6 dots divided into 3 equal groups. There are (_) dots in each group." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.234375, "loss": 1.8862910270690918 }, { "id": "identity_74498", "image": "TQA/images/abc_question_images/seasons_12629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phenomenon is represented by the arrows in the diagram?" }, { "from": "gpt", "value": "The answer is The Earth's orbit around the Sun" } ], "clip_score": 27.421875, "loss": 2.538048505783081 }, { "id": "identity_191825", "image": "A-OKVQA/images/577079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the kite closest to the ground on the left side?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 26.171875, "loss": 3.5684075355529785 }, { "id": "identity_41392", "image": "FigureQA/images/33567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Khaki section larger than the Steel Blue section?" }, { "from": "gpt", "value": "The answer is No, the Khaki section is smaller than the Steel Blue section" } ], "clip_score": 35.03125, "loss": 1.2817821502685547 }, { "id": "identity_331647", "image": "IconQA/images/28293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Nick is taking a walk in the morning. His watch shows a time. What is the time?\nChoices:\n(A) 6:00 P.M.\n(B) 6:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.5625, "loss": 3.668616533279419 }, { "id": "identity_228062", "image": "DocVQA/images/xfpg0227_66.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the document has a total of 100 pages, what percentage of the document has been read if the current page is 47?" }, { "from": "gpt", "value": "The answer is 47%" } ], "clip_score": 20.1875, "loss": 1.1859078407287598 }, { "id": "identity_127404", "image": "TextVQA/images/4a09c56a914cfb7c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the condition of the sky in the background?" }, { "from": "gpt", "value": "The answer is Cloudy" } ], "clip_score": 14.1015625, "loss": 3.0988709926605225 }, { "id": "identity_171259", "image": "IconQA/images/66113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Brad took 10 minutes to eat breakfast, how much time did he have to get dressed and pack a lunch before 8:00 AM?" }, { "from": "gpt", "value": "The answer is 20 minutes" } ], "clip_score": 32.28125, "loss": 1.9681708812713623 }, { "id": "identity_103236", "image": "CLEVR-Math/images/CLEVR_train_023763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are large?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.546875, "loss": 2.608041524887085 }, { "id": "identity_17445", "image": "A-OKVQA/images/64551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of biking event is being held here?\nChoices:\n(A) hetrosexual\n(B) sit in\n(C) bake sale\n(D) gay" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.125, "loss": 3.655677318572998 }, { "id": "identity_33641", "image": "GeoQA+/images/5416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知,PD为⊙O的直径,直线BC切⊙O于点C,BP的延长线与CD的延长线交于点A,∠A=28°,∠B=26°,则∠PDC等于()\nChoices:\n(A) 34°\n(B) 36°\n(C) 38°\n(D) 40°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.609375, "loss": 2.466517925262451 }, { "id": "identity_219033", "image": "TextVQA/images/442d372e1f977999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color theme of the costumes?" }, { "from": "gpt", "value": "The answer is Black and white with a touch of purple" } ], "clip_score": 31.015625, "loss": 2.18609356880188 }, { "id": "identity_254000", "image": "PMC-VQA/images/PMC5362993_Fig2_144559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structures are adjacent to the hilar vessels indicated by the asterisk in the CT scan, and what is their significance in diagnosing lung conditions?" }, { "from": "gpt", "value": "The answer is Bronchi, pulmonary arteries, and veins; significant in diagnosing lung conditions" } ], "clip_score": 28.90625, "loss": 2.3151419162750244 }, { "id": "identity_146447", "image": "DocVQA/images/slmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the report mentioned in the document?" }, { "from": "gpt", "value": "The answer is ANNUAL PROGRESS REPORT --- TO THE SURGEON GENERAL" } ], "clip_score": 23.890625, "loss": 2.0714921951293945 }, { "id": "identity_120929", "image": "PMC-VQA/images/PMC7862146_os12831-fig-0002_359979.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any surgical instruments or implants visible in the image?" }, { "from": "gpt", "value": "The answer is Yes or No, depending on the observation" } ], "clip_score": 26.0, "loss": 3.14650821685791 }, { "id": "identity_95496", "image": "IconQA/images/86204.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tennis rackets have strings that are fully visible?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 29.21875, "loss": 2.414893388748169 }, { "id": "identity_14853", "image": "AI2D/images/2735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What phases are shown above?\nChoices:\n(A) mars phases\n(B) moon phases\n(C) sun phases\n(D) earth phases" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.75, "loss": 3.4551498889923096 }, { "id": "identity_172495", "image": "IconQA/images/14500.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hours and minutes displayed on the watch?" }, { "from": "gpt", "value": "The answer is 3 hours and 0 minutes" } ], "clip_score": 30.5, "loss": 1.3468623161315918 }, { "id": "identity_84648", "image": "UniGeo/images/3043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AD if AB=CD=10 units?" }, { "from": "gpt", "value": "The answer is 10 units" } ], "clip_score": 27.515625, "loss": 1.7759217023849487 }, { "id": "identity_327889", "image": "IconQA/images/17597.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the clocks indicate Jim did on Saturday night? Which activity did Jim do later?\nChoices:\n(A) watched a movie\n(B) did homework" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.171875, "loss": 3.256054401397705 }, { "id": "identity_29511", "image": "Super-CLEVR/images/superCLEVR_new_001629.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There is a shiny object that is the same size as the metal school bus; what is its shape?" }, { "from": "gpt", "value": "The answer is wagon" } ], "clip_score": 28.734375, "loss": 3.4902830123901367 }, { "id": "identity_71445", "image": "AI2D/images/1197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the mandibles in the ant?" }, { "from": "gpt", "value": "The answer is Mandibles are used for gripping, biting, cutting, carrying objects, and defense" } ], "clip_score": 32.21875, "loss": 2.0476479530334473 }, { "id": "identity_229772", "image": "ScienceQA/images/4985.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which properties do these two objects have in common, and which one is unique to each object?" }, { "from": "gpt", "value": "The answer is Soft, worn as clothing; sports activities, casual occasions" } ], "clip_score": 31.96875, "loss": 4.66396951675415 }, { "id": "identity_231603", "image": "FigureQA/images/573.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Maroon and Firebrick?" }, { "from": "gpt", "value": "The answer is 60%" } ], "clip_score": 29.6875, "loss": 1.9825905561447144 }, { "id": "identity_324847", "image": "TabMWP/images/2258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some individuals compared the quantity of tractors they possess. What is the range of these quantities?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 33.1875, "loss": 3.9326012134552 }, { "id": "identity_94014", "image": "IconQA/images/16016.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of petals on all flowers combined?" }, { "from": "gpt", "value": "The answer is 240" } ], "clip_score": 23.3125, "loss": 1.5529371500015259 }, { "id": "identity_244109", "image": "DocVQA/images/mslw0227_89.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tests were added to the battery in the year 1971, and what are their names?" }, { "from": "gpt", "value": "The answer is Six tests: Memory for Objects, Knox Cubes Slow and Fast, Incomplete Figures, Block Designs, Elimination, Animal House" } ], "clip_score": 24.515625, "loss": 2.9330573081970215 }, { "id": "identity_156933", "image": "AI2D/images/4524.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the botanical term for the central axis of a compound leaf as shown in the image?" }, { "from": "gpt", "value": "The answer is Rachis" } ], "clip_score": 33.09375, "loss": 2.6778485774993896 }, { "id": "identity_121249", "image": "PMC-VQA/images/PMC6022407_Fig8_334272.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brownish spots can be counted in the upper layer of the microphotograph?" }, { "from": "gpt", "value": "The answer is Count the number of brownish spots visible in the image" } ], "clip_score": 27.09375, "loss": 1.9816365242004395 }, { "id": "identity_199909", "image": "A-OKVQA/images/4551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the sky clear or cloudy in the image?" }, { "from": "gpt", "value": "The answer is Cloudy" } ], "clip_score": 13.7421875, "loss": 2.9424331188201904 }, { "id": "identity_79136", "image": "Geometry3K/images/2019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In circle R, if SU = 20, YW = 20, and m∠YXZ = 45°, what is the measure of angle RZT?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 34.5625, "loss": 1.9257456064224243 }, { "id": "identity_186045", "image": "PMC-VQA/images/PMC6053234_pone.0201064.g001_343863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely cause of the hyperintensities observed in the images?" }, { "from": "gpt", "value": "The answer is Edema or ischemia related to collapsed lobes" } ], "clip_score": 26.625, "loss": 3.6012232303619385 }, { "id": "identity_3720", "image": "IconQA/images/24586.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What time does the clock show?\nChoices:\n(A) 9:00\n(B) 4:00" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.671875, "loss": 3.889587879180908 }, { "id": "identity_207534", "image": "VQA2.0/images/26812.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the tablecloth?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 15.0703125, "loss": 3.194955348968506 }, { "id": "identity_222950", "image": "TextVQA/images/00d4032b2e2efeba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the door of the establishment in the background?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 10.328125, "loss": 3.732795000076294 }, { "id": "identity_197173", "image": "PMC-VQA/images/PMC6938471_Fig1_92791.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of a stroke?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.0625, "loss": 2.7436351776123047 }, { "id": "identity_158321", "image": "AI2D/images/4260.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the E horizon in the image represent?" }, { "from": "gpt", "value": "The answer is Zone of eluviation and leaching" } ], "clip_score": 20.96875, "loss": 1.7308015823364258 }, { "id": "identity_2818", "image": "VQA2.0/images/262049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of vehicle is in this picture?" }, { "from": "gpt", "value": "The answer is train" } ], "clip_score": 22.609375, "loss": 3.4332492351531982 }, { "id": "identity_58505", "image": "PlotQA/images/17627.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for Hong Kong?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 24.734375, "loss": 2.760789632797241 }, { "id": "identity_205582", "image": "A-OKVQA/images/325021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of food is visible in the transparent container on the table?" }, { "from": "gpt", "value": "The answer is Coleslaw" } ], "clip_score": 22.296875, "loss": 2.737363815307617 }, { "id": "identity_273748", "image": "ScienceQA/images/468.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which trade option would allow Abdul and Elise to exchange items to get what they want? \nChoices:\n(A) Abdul trading his tomatoes for Elise's carrots.\n(B) Elise trading her broccoli for Abdul's oranges.\n(C) Elise trading her almonds for Abdul's tomatoes.\n(D) Abdul trading his tomatoes for Elise's broccoli." }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.21875, "loss": 2.8638553619384766 }, { "id": "identity_323743", "image": "A-OKVQA/images/205762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is typically found behind barriers like these?\nChoices:\n(A) fish\n(B) prisoners\n(C) people\n(D) dogs" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.5546875, "loss": 4.454649448394775 }, { "id": "identity_244528", "image": "ScienceQA/images/15883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of the state highlighted in the image and name one major landmark located in that capital city" }, { "from": "gpt", "value": "The answer is Saint Paul, Minnesota State Capitol" } ], "clip_score": 25.875, "loss": 3.0049445629119873 }, { "id": "identity_72274", "image": "AI2D/images/2860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled as \"Inner Ear (Cochlea)\"?" }, { "from": "gpt", "value": "The answer is Cochlea" } ], "clip_score": 32.84375, "loss": 2.0705368518829346 }, { "id": "identity_12134", "image": "PlotQA/images/38380.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the difference between the expenditure made on health care in 1996 in Least developed countries and Middle East & North Africa (all income levels) greater than the difference between the expenditure made on health care in 1995 in Least developed countries and Middle East & North Africa (all income levels) ?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 37.875, "loss": 3.146181583404541 }, { "id": "identity_266658", "image": "GeoQA+/images/4693.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, the perimeter of rectangle ABCD is 20cm. AC intersects BD at point O. A perpendicular line EF is drawn through point O to AC, intersecting AD and BC at points E and F respectively. If CE is connected, what is the perimeter of triangle CDE?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 12" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.375, "loss": 2.688704013824463 }, { "id": "identity_131885", "image": "A-OKVQA/images/493485.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the lead rope attached to the animal?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.9765625, "loss": 3.067664384841919 }, { "id": "identity_175472", "image": "TabMWP/images/19644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can Simon afford to buy the plastic horse and three green marbles with his $1.28?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.359375, "loss": 2.6972992420196533 }, { "id": "identity_333285", "image": "TabMWP/images/16093.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Mrs. Avila hosts an annual art contest for kids, and she keeps a record of the number of entries each year. According to the table, what was the rate of change between one year and the next in 2011 and 2012?" }, { "from": "gpt", "value": "The answer is -13" } ], "clip_score": 39.0, "loss": 3.2283477783203125 }, { "id": "identity_30887", "image": "VQA2.0/images/180341.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the items under the people's arms?" }, { "from": "gpt", "value": "The answer is ski poles" } ], "clip_score": 24.046875, "loss": 2.5747182369232178 }, { "id": "identity_309165", "image": "MapQA/images/map_1579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this state have the lowest value in the USA?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 29.1875, "loss": 2.446733236312866 }, { "id": "identity_299170", "image": "VQA-AS/images/15536.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the individual seated on the bench?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 28.859375, "loss": 3.273221492767334 }, { "id": "identity_86548", "image": "Geometry3K/images/97.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the expressions for the left and right sides of the quadrilateral?" }, { "from": "gpt", "value": "The answer is The sum is 12y + 24" } ], "clip_score": 36.6875, "loss": 2.587127685546875 }, { "id": "identity_6104", "image": "TextVQA/images/8aa036e8bb3f39d8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of the restaurant on the sign?" }, { "from": "gpt", "value": "The answer is friendly's" } ], "clip_score": 30.421875, "loss": 3.3911292552948 }, { "id": "identity_156378", "image": "TQA/images/question_images/tides_2608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phase of the Moon shown in the image?" }, { "from": "gpt", "value": "The answer is Full Moon" } ], "clip_score": 24.125, "loss": 2.772550582885742 }, { "id": "identity_259096", "image": "PMC-VQA/images/PMC5078001_F4_87237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which marker is represented in red in the IHC panel, and how many red markers are visible in the image?" }, { "from": "gpt", "value": "The answer is CEP9, 24" } ], "clip_score": 23.578125, "loss": 4.114124298095703 }, { "id": "identity_289836", "image": "A-OKVQA/images/244231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: On what slender objects are the birds perched?\nChoices:\n(A) branches\n(B) poles\n(C) power lines\n(D) traps" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.53125, "loss": 4.003000259399414 }, { "id": "identity_98581", "image": "CLEVR-Math/images/CLEVR_train_051087.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are small cylinders?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.890625, "loss": 2.5637829303741455 }, { "id": "identity_54720", "image": "DVQA/images/bar_train_00191743.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the algorithm parlor in the dataset thick?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.4375, "loss": 2.9125471115112305 }, { "id": "identity_260270", "image": "VQA2.0/images/568840.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of food items are on the trays, and what are they?" }, { "from": "gpt", "value": "The answer is Two types of donuts and coffee" } ], "clip_score": 29.375, "loss": 3.5548810958862305 }, { "id": "identity_104131", "image": "A-OKVQA/images/55575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is depicted in the image?" }, { "from": "gpt", "value": "The answer is A natural history museum" } ], "clip_score": 27.96875, "loss": 3.224891424179077 }, { "id": "identity_27004", "image": "Geometry3K/images/178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, \\triangle A B C is isosceles, \\triangle D C E is equilateral, and \\triangle F C G is isosceles. Find the measure of the \\angle 2 at vertex C.\nChoices:\n(A) 14\n(B) 16\n(C) 17\n(D) 18" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.8125, "loss": 2.31174898147583 }, { "id": "identity_205175", "image": "PMC-VQA/images/PMC7175531_Fig1_156253.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging modality is used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 33.5625, "loss": 2.5505306720733643 }, { "id": "identity_333919", "image": "A-OKVQA/images/470174.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is in the middle of the image?\nChoices:\n(A) statue\n(B) tv\n(C) window\n(D) wood stove" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.015625, "loss": 4.166045188903809 }, { "id": "identity_306402", "image": "AI2D/images/1975.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many animals prey on the rabbit?\nChoices:\n(A) 4\n(B) 2\n(C) 1\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.28125, "loss": 3.508021831512451 }, { "id": "identity_115065", "image": "VQA2.0/images/292512.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rolls of toilet paper are on the shelf?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.515625, "loss": 2.7513809204101562 }, { "id": "identity_56868", "image": "DocVQA/images/fshk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the first company/institution name listed on the right column?" }, { "from": "gpt", "value": "The answer is Value Line" } ], "clip_score": 19.703125, "loss": 3.966710090637207 }, { "id": "identity_17514", "image": "ChartQA/images/64930712000708.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much times is the longest bar over the shortest bar by value" }, { "from": "gpt", "value": "The answer is 22.33" } ], "clip_score": 10.421875, "loss": 2.2680602073669434 }, { "id": "identity_267288", "image": "VQA-AS/images/5445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the color of the rocking horse?" }, { "from": "gpt", "value": "The answer is purple" } ], "clip_score": 15.5625, "loss": 2.8715133666992188 }, { "id": "identity_21863", "image": "TextVQA/images/d987facdb741de7c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who was born on this day?" }, { "from": "gpt", "value": "The answer is christ the lord" } ], "clip_score": 23.671875, "loss": 2.792786121368408 }, { "id": "identity_170911", "image": "TabMWP/images/37050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more expensive is it to buy 4 kilograms of faux fur scraps compared to 4 kilograms of lace scraps?" }, { "from": "gpt", "value": "The answer is $4" } ], "clip_score": 30.046875, "loss": 2.7939088344573975 }, { "id": "identity_206195", "image": "VQA2.0/images/22447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the pen on the desk?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.109375, "loss": 3.4390532970428467 }, { "id": "identity_115359", "image": "TextVQA/images/5a28beb8f64b7d77.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible vehicles in the background?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.640625, "loss": 3.190886974334717 }, { "id": "identity_36410", "image": "UniGeo/images/3413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,点C、D都在⊙O上,若∠ABC=50°,则∠BDC=()\nChoices:\n(A) 50°\n(B) 45°\n(C) 40°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.140625, "loss": 2.7141926288604736 }, { "id": "identity_313567", "image": "UniGeo/images/1113.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 四边形ABCD内接于一个圆,E在BC延长线上,∠DCE=70°,则∠BOD等于多少?\n选项:\n(A) 100°\n(B) 110°\n(C) 140°\n(D) 70°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.265625, "loss": 2.4042410850524902 }, { "id": "identity_254596", "image": "A-OKVQA/images/479620.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the person on top of the horse wearing on their head, and what is the color of the horse's mane?" }, { "from": "gpt", "value": "The answer is Helmet, black and white" } ], "clip_score": 27.25, "loss": 3.1179006099700928 }, { "id": "identity_121722", "image": "A-OKVQA/images/343769.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of flooring is the sport being played on?" }, { "from": "gpt", "value": "The answer is Indoor sports flooring" } ], "clip_score": 23.96875, "loss": 2.8617427349090576 }, { "id": "identity_308244", "image": "FigureQA/images/45355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Chartreuse the smallest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.53125, "loss": 4.188727378845215 }, { "id": "identity_101907", "image": "IconQA/images/28872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares wide is the letter R before transformation?" }, { "from": "gpt", "value": "The answer is 3 squares" } ], "clip_score": 31.859375, "loss": 2.6510469913482666 }, { "id": "identity_323470", "image": "A-OKVQA/images/522461.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the individuals sitting in the snow doing?\nChoices:\n(A) waiting\n(B) sledding\n(C) gambling\n(D) skiing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.125, "loss": 4.323390483856201 }, { "id": "identity_238450", "image": "VQA2.0/images/442723.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many carriages does the train have, and what is the color of the building behind the train?" }, { "from": "gpt", "value": "The answer is Two carriages, red" } ], "clip_score": 24.671875, "loss": 4.061524868011475 }, { "id": "identity_39584", "image": "DVQA/images/bar_train_00172718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of saline in bank the smallest among all categories?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.765625, "loss": 3.490391969680786 }, { "id": "identity_307775", "image": "PMC-VQA/images/PMC5035136_f0025_76894.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is being shown in these scatterplots on the right?\nChoices:\n(A) PPI correlation\n(B) Contrast of feature\n(C) MDN connectivity\n(D) Performance measurement" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.78125, "loss": 4.398189544677734 }, { "id": "identity_72331", "image": "AI2D/images/1137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the flower develops into seeds after fertilization?" }, { "from": "gpt", "value": "The answer is Ovules" } ], "clip_score": 29.53125, "loss": 2.4457900524139404 }, { "id": "identity_281442", "image": "A-OKVQA/images/427555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which beverage are these ladies consuming? Is it whiskey, beer, water, or wine?\nChoices:\n(A) whiskey\n(B) beer\n(C) water\n(D) wine" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.640625, "loss": 3.7114269733428955 }, { "id": "identity_183922", "image": "VQA-AS/images/6075.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the cat?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 19.703125, "loss": 3.295555830001831 }, { "id": "identity_301244", "image": "VQA2.0/images/576685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does this animal eat?" }, { "from": "gpt", "value": "The answer is fish" } ], "clip_score": 15.7734375, "loss": 2.4461350440979004 }, { "id": "identity_277579", "image": "TQA/images/question_images/earth_moon_phases_1251.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which moon phase occurs after the new moon and before the first quarter: Waxing crescent, Last Quarter, Waning Gibbous, or Full moon?\nChoices:\n(A) Waxing crescent\n(B) Last Quarter\n(C) Waning Gibbous\n(D) Full moon" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.5625, "loss": 3.5937533378601074 }, { "id": "identity_314675", "image": "Geometry3K/images/203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 2.\nChoices:\n(A) 26\n(B) 39\n(C) 55\n(D) 62" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.09375, "loss": 3.109154224395752 }, { "id": "identity_28232", "image": "IconQA/images/47872.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Tom is making his bed one morning. The clock shows the time. What time is it?\nChoices:\n(A) 7:00 P.M.\n(B) 7:00 A.M." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 3.820197343826294 }, { "id": "identity_109046", "image": "A-OKVQA/images/161719.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pet is sitting on the floor?" }, { "from": "gpt", "value": "The answer is Dog" } ], "clip_score": 13.375, "loss": 3.287421703338623 }, { "id": "identity_101379", "image": "CLEVR-Math/images/CLEVR_train_012161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more cubes or cylinders?" }, { "from": "gpt", "value": "The answer is They are equal" } ], "clip_score": 23.203125, "loss": 3.7378554344177246 }, { "id": "identity_288313", "image": "TabMWP/images/19340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was Julia's total income after expenses last fall, considering she made donuts and apple fritters while working at a donut shop?" }, { "from": "gpt", "value": "The answer is 6,335" } ], "clip_score": 37.0625, "loss": 2.6590960025787354 }, { "id": "identity_1943", "image": "Geometry3K/images/1617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Lines l, m, and n are perpendicular bisectors of \\triangle P Q R and meet at T. If T Q = 2 x, P T = 3 y - 1, and T R = 8, find x.\nChoices:\n(A) 3\n(B) 4\n(C) 5\n(D) 6" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.765625, "loss": 2.352679967880249 }, { "id": "identity_163587", "image": "Geometry3K/images/642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the triangle?" }, { "from": "gpt", "value": "The answer is Area = 1/2 * sqrt(z^2 - 361) * 19" } ], "clip_score": 34.15625, "loss": 1.9814046621322632 }, { "id": "identity_11749", "image": "AI2D/images/2121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Imagine that elk disappeared from the ecosystem. Which organism would lose a source of food as a result?\nChoices:\n(A) Grizzly bear\n(B) Migratory grasshopper\n(C) Yellow-bellied marmot\n(D) Strawberry plant" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.984375, "loss": 3.689736843109131 }, { "id": "identity_333539", "image": "A-OKVQA/images/65488.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the person wearing the covering around their neck?\nChoices:\n(A) keeping warm\n(B) covering scar\n(C) vanity\n(D) style" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 14.609375, "loss": 4.539215087890625 }, { "id": "identity_291945", "image": "TabMWP/images/11050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Clara's school provided the number of members in each sports team. What is the average number of members across all the teams?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 33.40625, "loss": 2.305762767791748 }, { "id": "identity_129001", "image": "VQA2.0/images/236217.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the umbrella held by the person on the right?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 16.21875, "loss": 2.3524720668792725 }, { "id": "identity_21729", "image": "TextVQA/images/323cd30dcc54ef38.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what kind of space command is it?" }, { "from": "gpt", "value": "The answer is computer space command" } ], "clip_score": 14.015625, "loss": 3.0310609340667725 }, { "id": "identity_80816", "image": "GeoQA+/images/5702.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the small square in the center has a side length equal to the shorter leg of the right triangles, what is the ratio of the area of the small square to the area of one of the right triangles?" }, { "from": "gpt", "value": "The answer is 1.2" } ], "clip_score": 30.5, "loss": 2.6773433685302734 }, { "id": "identity_253434", "image": "GeoQA+/images/4653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,将一条对边互相平行的纸带进行两次折叠,折痕分别为AB,CD.若CD∥BE,∠1=30°,且AB与CD的长度相等,则∠2的度数是多少?" }, { "from": "gpt", "value": "The answer is 150°" } ], "clip_score": 26.125, "loss": 1.680728793144226 }, { "id": "identity_140330", "image": "PlotQA/images/93454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are compared in this chart?" }, { "from": "gpt", "value": "The answer is 3 countries" } ], "clip_score": 24.609375, "loss": 2.36808180809021 }, { "id": "identity_106769", "image": "A-OKVQA/images/202880.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vegetation is the giraffe facing eating?" }, { "from": "gpt", "value": "The answer is A young tree or sapling" } ], "clip_score": 30.84375, "loss": 3.187635898590088 }, { "id": "identity_70303", "image": "ScienceQA/images/15059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of unique items wanted by Ernest and Zane?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 21.453125, "loss": 2.0912320613861084 }, { "id": "identity_271785", "image": "FigureQA/images/76405.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes Dark Gold as the smoothest? \n(A) no \n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.59375, "loss": 4.45334529876709 }, { "id": "identity_322801", "image": "PlotQA/images/142157.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the difference between the net official aid flow in 2008 in these two countries greater than the difference between the net official aid flow in 2006 in these two countries?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.875, "loss": 2.845499038696289 }, { "id": "identity_285813", "image": "ChartQA/images/7558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final result obtained by adding all green bars with a value below 40, and then multiplying the result by the median blue bar?" }, { "from": "gpt", "value": "The answer is 4032" } ], "clip_score": 18.203125, "loss": 2.6941823959350586 }, { "id": "identity_313226", "image": "DocVQA/images/yrcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the code for the area?" }, { "from": "gpt", "value": "The answer is 215" } ], "clip_score": 9.234375, "loss": 2.384920835494995 }, { "id": "identity_203564", "image": "PMC-VQA/images/PMC7882111_acm213151-fig-0006_367662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum percentage of the prescribed dose (2500 cGy) delivered in this image?" }, { "from": "gpt", "value": "The answer is 110%" } ], "clip_score": 22.3125, "loss": 1.728462815284729 }, { "id": "identity_37109", "image": "IconQA/images/91403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.609375, "loss": 3.9445266723632812 }, { "id": "identity_222286", "image": "VizWiz/images/VizWiz_train_00018087.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any furniture visible in the image?" }, { "from": "gpt", "value": "The answer is Yes, a wooden nightstand" } ], "clip_score": 15.9765625, "loss": 3.2776103019714355 }, { "id": "identity_46027", "image": "ChartQA/images/10733.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the data presented in the image?" }, { "from": "gpt", "value": "The answer is 2015 Pew Research Center survey. Q4" } ], "clip_score": 29.875, "loss": 1.6247034072875977 }, { "id": "identity_271020", "image": "GeoQA+/images/3288.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the right triangle ABC, where angle B is 90 degrees, AB is 3, BC is 4, and point D lies on BC, what is the minimum value of DE in all parallelograms ADCE with AC as a diagonal? \nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 3.75" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.0625, "loss": 2.568065643310547 }, { "id": "identity_14867", "image": "PMC-VQA/images/PMC2917065_fig01_70861.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of images is shown in the figure? \nChoices:\n(A) CT Scans \n(B) MRI \n(C) X-Rays \n(D) Ultrasound " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.421875, "loss": 4.07914400100708 }, { "id": "identity_248692", "image": "PMC-VQA/images/PMC7271529_Fig5_184148.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific brain region is highlighted in the red box, and what is its primary function related to olfaction?" }, { "from": "gpt", "value": "The answer is Olfactory bulb, processes smell information" } ], "clip_score": 33.28125, "loss": 3.1263303756713867 }, { "id": "identity_78034", "image": "AI2D/images/15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many levels of consumers are depicted in this marine food web?" }, { "from": "gpt", "value": "The answer is Five levels of consumers" } ], "clip_score": 31.75, "loss": 2.926450490951538 }, { "id": "identity_5675", "image": "VQA-AS/images/16013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there anything on the table?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.46875, "loss": 3.634685516357422 }, { "id": "identity_89896", "image": "UniGeo/images/1715.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line l is rotated 40° counterclockwise to intersect line AB at point G, what is the measure of ∠EGB?" }, { "from": "gpt", "value": "The answer is 140°" } ], "clip_score": 36.53125, "loss": 1.6410727500915527 }, { "id": "identity_296341", "image": "DocVQA/images/hrpf0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Pastor at Third Baptist Church?" }, { "from": "gpt", "value": "The answer is The Reverend C. Oscar Johnson, D.D., LL.D." } ], "clip_score": 19.390625, "loss": 1.8277580738067627 }, { "id": "identity_212172", "image": "A-OKVQA/images/236747.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of protective gear is the dog wearing?" }, { "from": "gpt", "value": "The answer is Protective eyewear" } ], "clip_score": 30.28125, "loss": 2.79058837890625 }, { "id": "identity_306882", "image": "VQA-AS/images/13548.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the person's shirt?" }, { "from": "gpt", "value": "The answer is black" } ], "clip_score": 15.265625, "loss": 3.4141125679016113 }, { "id": "identity_238096", "image": "TextVQA/images/588c3eb6601cdb4e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the colors of the shirt and the object the person is sitting in, and what is written on the shirt?" }, { "from": "gpt", "value": "The answer is Orange shirt, yellow object, OSU on shirt" } ], "clip_score": 19.078125, "loss": 2.0412278175354004 }, { "id": "identity_151448", "image": "ScienceQA/images/20276.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly east of the state marked with an orange dot?" }, { "from": "gpt", "value": "The answer is Utah" } ], "clip_score": 32.875, "loss": 2.786888360977173 }, { "id": "identity_164307", "image": "GeoQA+/images/4129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the circle with radius 5?" }, { "from": "gpt", "value": "The answer is 25π" } ], "clip_score": 33.625, "loss": 1.5962119102478027 }, { "id": "identity_33596", "image": "ScienceQA/images/1202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of Ohio?\nChoices:\n(A) Cincinnati\n(B) Lincoln\n(C) Cleveland\n(D) Columbus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.3125, "loss": 3.8302693367004395 }, { "id": "identity_204517", "image": "A-OKVQA/images/57641.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the salon on the left side of the street?" }, { "from": "gpt", "value": "The answer is Friseursalon Figaro" } ], "clip_score": 25.9375, "loss": 3.476978063583374 }, { "id": "identity_94024", "image": "IconQA/images/33424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes tall is the paintbrush including the bristles?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.609375, "loss": 2.2702796459198 }, { "id": "identity_91768", "image": "IconQA/images/99027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Tommy needs to finish his letter by 8:00, how many minutes does he have left to write?" }, { "from": "gpt", "value": "The answer is 30 minutes" } ], "clip_score": 25.421875, "loss": 2.0990500450134277 }, { "id": "identity_206693", "image": "A-OKVQA/images/294176.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many clocks are there in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.3125, "loss": 2.233541250228882 }, { "id": "identity_222986", "image": "TextVQA/images/18a85804a835db25.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the distance to the left field foul pole, as indicated on the wall?" }, { "from": "gpt", "value": "The answer is 334 feet" } ], "clip_score": 25.6875, "loss": 2.06931471824646 }, { "id": "identity_161678", "image": "GeoQA+/images/650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 31.78125, "loss": 1.6479902267456055 }, { "id": "identity_67555", "image": "ScienceQA/images/20242.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism in the food web is a decomposer?" }, { "from": "gpt", "value": "The answer is Bolete fungus and Parasol fungus" } ], "clip_score": 24.0, "loss": 2.424931049346924 }, { "id": "identity_156412", "image": "ScienceQA/images/14296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in volume between Saturn and Uranus?" }, { "from": "gpt", "value": "The answer is 758,800" } ], "clip_score": 22.59375, "loss": 2.452052593231201 }, { "id": "identity_71990", "image": "ScienceQA/images/13100.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which building is directly south of the Police Department?" }, { "from": "gpt", "value": "The answer is Theater" } ], "clip_score": 29.0625, "loss": 3.527634382247925 }, { "id": "identity_230227", "image": "ScienceQA/images/8297.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average specific humidity level within the outlined area if the specific humidity levels at four evenly spaced points within the area are 4, 6, 8, and 4 grams of water vapor per kilogram of air?" }, { "from": "gpt", "value": "The answer is 5.5 grams of water vapor per kilogram of air" } ], "clip_score": 18.90625, "loss": 0.8462899923324585 }, { "id": "identity_163061", "image": "GeoQA+/images/4993.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diagonal AC in the given image?" }, { "from": "gpt", "value": "The answer is √136" } ], "clip_score": 30.09375, "loss": 2.8974390029907227 }, { "id": "identity_306645", "image": "TabMWP/images/1954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more tires were made in one month compared to another month?" }, { "from": "gpt", "value": "The answer is 4,234" } ], "clip_score": 33.28125, "loss": 2.454665422439575 }, { "id": "identity_77803", "image": "AI2D/images/1161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What cellular process occurs at the structure labeled \"ribosomes\"?" }, { "from": "gpt", "value": "The answer is Protein synthesis" } ], "clip_score": 25.21875, "loss": 1.9816724061965942 }, { "id": "identity_170652", "image": "TabMWP/images/14032.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the archer hit the least number of bull's-eyes?" }, { "from": "gpt", "value": "The answer is Friday" } ], "clip_score": 33.9375, "loss": 2.9816737174987793 }, { "id": "identity_45943", "image": "FigureQA/images/85851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value for Dark Gold at x-axis label 30?" }, { "from": "gpt", "value": "The answer is Approximately 45" } ], "clip_score": 26.96875, "loss": 2.6058223247528076 }, { "id": "identity_219992", "image": "TextVQA/images/99371dbae815a74b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many months to pay is offered for the General Electric Range?" }, { "from": "gpt", "value": "The answer is 36 months" } ], "clip_score": 22.703125, "loss": 2.1876320838928223 }, { "id": "identity_194252", "image": "A-OKVQA/images/416981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kitchen appliance is placed to the right of the stove?" }, { "from": "gpt", "value": "The answer is A microwave" } ], "clip_score": 24.765625, "loss": 3.230674982070923 }, { "id": "identity_248280", "image": "TabMWP/images/33531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of sheep owned by the farmers, and how does it compare to the median number of sheep owned?" }, { "from": "gpt", "value": "The answer is Average is 91.625, median is 91, difference is 0.625" } ], "clip_score": 37.78125, "loss": 1.995313048362732 }, { "id": "identity_83113", "image": "Geometry3K/images/646.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of the angle labeled (8y - 12)°?" }, { "from": "gpt", "value": "The answer is 112°" } ], "clip_score": 33.4375, "loss": 1.7731819152832031 }, { "id": "identity_227475", "image": "UniGeo/images/2167.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC中,D、E分别是BC、AC的中点,BF平分∠ABC,交DE于点F,若BC=6,且AC=8,则EF的长是()" }, { "from": "gpt", "value": "The answer is 2.5" } ], "clip_score": 30.203125, "loss": 2.0031542778015137 }, { "id": "identity_19657", "image": "A-OKVQA/images/80543.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is the man in black crouched?\nChoices:\n(A) to dive\n(B) to jump\n(C) gain speed\n(D) to sit" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.859375, "loss": 4.053434371948242 }, { "id": "identity_302767", "image": "TextVQA/images/13a5eb5cd5b417e3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year is written on this silver coin at the bottom?" }, { "from": "gpt", "value": "The answer is 1997" } ], "clip_score": 22.5625, "loss": 1.8026490211486816 }, { "id": "identity_333975", "image": "DocVQA/images/lxjh0227_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mentioned date?" }, { "from": "gpt", "value": "The answer is November 13, 1969" } ], "clip_score": 21.53125, "loss": 1.8789697885513306 }, { "id": "identity_264169", "image": "TQA/images/question_images/aquifers_6522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From which source does the water originate from the major irrigation well? \n(A) Fresh ground aquifer\n(B) Normal Water Table\n(C) Low Water Table\n(D) Normal Interface" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.078125, "loss": 3.812220811843872 }, { "id": "identity_115950", "image": "Super-CLEVR/images/superCLEVR_new_001519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the large object located to the left of the metallic chopper?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 20.671875, "loss": 5.407408237457275 }, { "id": "identity_241284", "image": "Super-CLEVR/images/superCLEVR_new_001223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare the sizes of the cyan truck, green fighter, and the green motorcycle. Are the cyan truck and green motorcycle larger than the green fighter combined?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 29.984375, "loss": 4.402085304260254 }, { "id": "identity_258684", "image": "IconQA/images/41472.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dots on all the dice used to measure the line?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 33.0, "loss": 1.910410761833191 }, { "id": "identity_177708", "image": "IconQA/images/91648.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many faces does this cube have?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.546875, "loss": 2.030644655227661 }, { "id": "identity_317686", "image": "UniGeo/images/1415.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,PA,PB分别与⊙O相切于A,B两点,点C是劣弧AB上一动点(不与A,B重合),∠P=70°,则∠C等于多少?\n选项:\n(A) 110°\n(B) 115°\n(C) 120°\n(D) 125°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.046875, "loss": 2.3786888122558594 }, { "id": "identity_249944", "image": "VQA2.0/images/282843.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential hazards and challenges for a beginner surfer in this location?" }, { "from": "gpt", "value": "The answer is Fast-moving water, crowded, concrete surroundings, advanced skills required" } ], "clip_score": 30.5625, "loss": 3.5172717571258545 }, { "id": "identity_87869", "image": "GeoQA+/images/592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, if angle CAB is 45 degrees and angle CBA is 75 degrees, what is the measure of angle ACM?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 35.65625, "loss": 1.7554070949554443 }, { "id": "identity_252041", "image": "PlotQA/images/133987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the total percentage of women justifying abuse for all reasons combined the highest?" }, { "from": "gpt", "value": "The answer is 2005" } ], "clip_score": 35.4375, "loss": 1.8071836233139038 }, { "id": "identity_88805", "image": "GeoQA+/images/4258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠A = 35° and chords AB and CD are perpendicular to each other at point P, what is the measure of ∠CPD?" }, { "from": "gpt", "value": "The answer is 145°" } ], "clip_score": 38.625, "loss": 1.3173861503601074 }, { "id": "identity_296939", "image": "GeoQA+/images/523.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, D, E, F are the midpoints of sides BC, AC, AB respectively. AH is perpendicular to BC at H, FD = 16. What is the length of HE?\nChoices:\n(A) 32\n(B) 16\n(C) 8\n(D) 10" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.90625, "loss": 2.5449509620666504 }, { "id": "identity_240027", "image": "A-OKVQA/images/79975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the boarder loses their balance and falls, which protective gear will safeguard their head, and what other protective gear is visible in the image that could protect other parts of their body?" }, { "from": "gpt", "value": "The answer is Helmet, knee pads, and wrist guards" } ], "clip_score": 20.125, "loss": 2.0607454776763916 }, { "id": "identity_11845", "image": "DocVQA/images/zthg0023_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which tobacco outlet's letterhead is this?" }, { "from": "gpt", "value": "The answer is Fast Lane Discount Tobacco Outlet, Inc." } ], "clip_score": 23.71875, "loss": 0.7791503071784973 }, { "id": "identity_221381", "image": "TextVQA/images/ee6ac652c1c9b888.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of products are displayed on the shelves?" }, { "from": "gpt", "value": "The answer is An integer number representing the count of different product types" } ], "clip_score": 25.21875, "loss": 3.3638436794281006 }, { "id": "identity_130689", "image": "VizWiz/images/VizWiz_train_00001865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of verification is being requested in the image?" }, { "from": "gpt", "value": "The answer is CAPTCHA verification" } ], "clip_score": 29.828125, "loss": 1.8437145948410034 }, { "id": "identity_116927", "image": "TextVQA/images/baf7aaefe7579ff1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of film sizes does the Omega B-22 handle?" }, { "from": "gpt", "value": "The answer is Up to 2¼ x 2¼ inches" } ], "clip_score": 16.921875, "loss": 2.686872959136963 }, { "id": "identity_1594", "image": "AI2D/images/3510.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of label T in the above diagram?\nChoices:\n(A) Nucleus\nNucleus\n(B) Cytoplasm\n(C) Mitochondrion\n(D) Chloroplast" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.4375, "loss": 4.131322383880615 }, { "id": "identity_17200", "image": "DocVQA/images/lfcc0081_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Name the compound with 1.23 Mean?" }, { "from": "gpt", "value": "The answer is Organophosphorus" } ], "clip_score": 16.40625, "loss": 2.512058734893799 }, { "id": "identity_162075", "image": "GeoQA+/images/3614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在⊙O中,若∠BOC=50°,则∠CAB的度数为多少?" }, { "from": "gpt", "value": "The answer is 25°" } ], "clip_score": 31.890625, "loss": 2.0674595832824707 }, { "id": "identity_52197", "image": "PlotQA/images/88688.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage increase in imports for Fiji from 2000 to 2006?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 21.90625, "loss": 1.8207393884658813 }, { "id": "identity_241874", "image": "ScienceQA/images/12969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Trace the flow of matter from the persimmon tree to the bolete fungus, listing all organisms involved in the process" }, { "from": "gpt", "value": "The answer is persimmon tree, black bear, gray fox, bolete fungus" } ], "clip_score": 26.78125, "loss": 1.4746553897857666 }, { "id": "identity_197099", "image": "PMC-VQA/images/PMC7106592_Fig1_143146.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any abnormalities outside the red square in the MRI scan?" }, { "from": "gpt", "value": "The answer is No abnormalities detected outside the red square" } ], "clip_score": 32.625, "loss": 2.6999943256378174 }, { "id": "identity_147800", "image": "DVQA/images/bar_train_00161513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the object labeled \"bank\"?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.921875, "loss": 2.7852840423583984 }, { "id": "identity_155456", "image": "TQA/images/question_images/faults_1711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which fault type is the hanging wall positioned above the footwall?" }, { "from": "gpt", "value": "The answer is Reverse fault" } ], "clip_score": 28.4375, "loss": 2.547334671020508 }, { "id": "identity_203307", "image": "TextVQA/images/30381ee59ff50f1a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the actor mentioned on the poster for his role in the movie?" }, { "from": "gpt", "value": "The answer is Bela Lugosi" } ], "clip_score": 20.796875, "loss": 1.8983917236328125 }, { "id": "identity_330657", "image": "TabMWP/images/11050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Clara's school reported the number of members for each sports team. What is the mean of these numbers?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 31.546875, "loss": 2.3454768657684326 }, { "id": "identity_222627", "image": "TextVQA/images/cd27c26641aad71f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beer is in the image?" }, { "from": "gpt", "value": "The answer is Honker's Ale" } ], "clip_score": 28.203125, "loss": 3.354048728942871 }, { "id": "identity_5301", "image": "TabMWP/images/16575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The Oakland Soup Company recorded the number of rotten tomatoes in each barrel it received. What is the smallest number of rotten tomatoes?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 14.7890625, "loss": 2.664865493774414 }, { "id": "identity_315766", "image": "PlotQA/images/105112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between two consecutive major ticks on this axis?" }, { "from": "gpt", "value": "The answer is 5.0" } ], "clip_score": 23.875, "loss": 2.4968087673187256 }, { "id": "identity_184614", "image": "VQA2.0/images/370922.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the building on the right?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 13.5625, "loss": 4.1761369705200195 }, { "id": "identity_222826", "image": "PMC-VQA/images/PMC4330237_Fig15_358901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teeth are visible in the upper jaw of the image?" }, { "from": "gpt", "value": "The answer is Sixteen" } ], "clip_score": 31.125, "loss": 4.443240642547607 }, { "id": "identity_158175", "image": "AI2D/images/172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which season is represented on the right side of the diagram in the Southern Hemisphere?" }, { "from": "gpt", "value": "The answer is Summer" } ], "clip_score": 32.21875, "loss": 3.2359917163848877 }, { "id": "identity_87032", "image": "UniGeo/images/3088.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that angle AOE is 24 degrees, what is the measure of angle AOC?" }, { "from": "gpt", "value": "The answer is 48 degrees" } ], "clip_score": 30.984375, "loss": 1.9504306316375732 }, { "id": "identity_280803", "image": "FigureQA/images/95056.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Do Orange Red and Dark Turquoise intersect?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.890625, "loss": 3.493393659591675 }, { "id": "identity_25743", "image": "PMC-VQA/images/PMC3868516_pntd-0002602-g008_252981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What technique was used to capture these images? \nChoices:\n(A) X-Ray imaging \n(B) MRI scanning \n(C) Polarization microscopy \n(D) Confocal microscopy " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.609375, "loss": 3.1845219135284424 }, { "id": "identity_302759", "image": "GeoQA+/images/1657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,某轮船由东向西航行,在A处测得灯塔M在它的北偏西75°方向上,继续航行8海里到达B处,此时测得灯塔M在它的北偏西60°方向上,则BM是多少?\n\n选择:\n(A) 8海里\n(B) 4海里\n(C) 4√{3}海里\n(D) 4√{2}海里" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.359375, "loss": 2.2042899131774902 }, { "id": "identity_257499", "image": "A-OKVQA/images/562911.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the type of vehicle in the foreground and describe its primary use in three different industries" }, { "from": "gpt", "value": "The answer is Medical transport, news coverage, and crop dusting" } ], "clip_score": 22.125, "loss": 3.091609477996826 }, { "id": "identity_245899", "image": "DocVQA/images/mmlk0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total hours and minutes are scheduled for the conference on March 12, 1976, based on the provided schedule?" }, { "from": "gpt", "value": "The answer is 2 hours and 10 minutes" } ], "clip_score": 25.875, "loss": 1.9294114112854004 }, { "id": "identity_119126", "image": "A-OKVQA/images/39928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is predominantly visible on the storefront signs?" }, { "from": "gpt", "value": "The answer is German" } ], "clip_score": 27.0625, "loss": 3.087627410888672 }, { "id": "identity_241069", "image": "DocVQA/images/ysnf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for submitting suggestions for the AHA Annual Meeting, and how many months in advance of the meeting is this deadline?" }, { "from": "gpt", "value": "The answer is June 24th, 1974, and 17 months" } ], "clip_score": 25.90625, "loss": 2.230870008468628 }, { "id": "identity_198773", "image": "A-OKVQA/images/233112.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the towel hanging on the left side of the image?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 20.0, "loss": 3.38226318359375 }, { "id": "identity_170131", "image": "TabMWP/images/23459.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Marie wants to buy one of each book, how much more money does she need in addition to her $5.50?" }, { "from": "gpt", "value": "The answer is $14.77" } ], "clip_score": 15.671875, "loss": 2.112745523452759 }, { "id": "identity_94368", "image": "TabMWP/images/12216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of fire hydrants on these streets?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 41.3125, "loss": 3.196478843688965 }, { "id": "identity_278531", "image": "Geometry3K/images/1377.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nOptions:\n(A) 5\n(B) 7.5\n(C) 10\n(D) 12.5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.546875, "loss": 2.6586995124816895 }, { "id": "identity_193089", "image": "PMC-VQA/images/PMC7914843_molecules-26-00847-f002_377457.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any other structures in the image besides 'RT' and 'B'?" }, { "from": "gpt", "value": "The answer is Likely glomeruli, interstitium, or collecting ducts" } ], "clip_score": 27.65625, "loss": 3.3703103065490723 }, { "id": "identity_334094", "image": "VQA2.0/images/258676.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this person doing?" }, { "from": "gpt", "value": "The answer is playing baseball" } ], "clip_score": 24.265625, "loss": 2.7808778285980225 }, { "id": "identity_294892", "image": "TextVQA/images/37b20486199a4dc4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the name of this team?" }, { "from": "gpt", "value": "The answer is free gaza" } ], "clip_score": 16.609375, "loss": 5.377803802490234 }, { "id": "identity_106496", "image": "PMC-VQA/images/PMC8929746_f2_225133.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level used in this gastric tissue image?" }, { "from": "gpt", "value": "The answer is High magnification (200x to 400x)" } ], "clip_score": 27.375, "loss": 1.5280898809432983 }, { "id": "identity_52281", "image": "MapQA/images/map_498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with jurors in the range of 3366-4317?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.265625, "loss": 2.848493814468384 }, { "id": "identity_112822", "image": "VQA-RAD/images/synpic25534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the window width and level setting used in this CT image?" }, { "from": "gpt", "value": "The answer is Window width: 229 HU, Level: 97 HU" } ], "clip_score": 21.734375, "loss": 1.93874990940094 }, { "id": "identity_95425", "image": "IconQA/images/26707.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the ruler is divided into millimeters, how many millimeters long is the ruler?" }, { "from": "gpt", "value": "The answer is 100 millimeters" } ], "clip_score": 31.03125, "loss": 1.2887465953826904 }, { "id": "identity_86875", "image": "GeoQA+/images/1824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is perpendicular to CD and BE is the diameter of the circle with center O, if the length of chord AC is 3, what is the length of chord BD?" }, { "from": "gpt", "value": "The answer is BD = 3√3/2" } ], "clip_score": 34.34375, "loss": 1.5655133724212646 }, { "id": "identity_258406", "image": "IconQA/images/98804.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sides do all the shapes have in total?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 29.140625, "loss": 1.899149775505066 }, { "id": "identity_97002", "image": "TabMWP/images/6473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode score of the math quiz?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 35.0, "loss": 1.9105349779129028 }, { "id": "identity_302919", "image": "UniGeo/images/2470.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given the image, it is known that the diameter AB of circle O and the chord AC form an angle of 31°. The tangent line PC at point C intersects the extension of AB at point P. What is the measure of ∠P?\n\nChoices:\n(A) 24°\n(B) 25°\n(C) 28°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 38.28125, "loss": 2.2607243061065674 }, { "id": "identity_40139", "image": "DVQA/images/bar_train_00181442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item 'boat' across all stores?" }, { "from": "gpt", "value": "The answer is 30 units" } ], "clip_score": 33.21875, "loss": 2.2937912940979004 }, { "id": "identity_33975", "image": "A-OKVQA/images/468956.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What skill does the child hone here?\nChoices:\n(A) tooth brushing\n(B) singing\n(C) reading\n(D) sleeping" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.484375, "loss": 4.243390083312988 }, { "id": "identity_191672", "image": "VQA2.0/images/237511.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bicycle parked next to the bench?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 24.25, "loss": 2.9173061847686768 }, { "id": "identity_5838", "image": "ScienceQA/images/4160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific humidity level was measured within the outlined area shown?\nChoices:\n(A) 15 grams of water vapor per kilogram of air\n(B) 11 grams of water vapor per kilogram of air\n(C) 5 grams of water vapor per kilogram of air" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 16.296875, "loss": 3.986164093017578 }, { "id": "identity_215887", "image": "A-OKVQA/images/514191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.890625, "loss": 4.53424596786499 }, { "id": "identity_233108", "image": "AI2D/images/3865.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the layer that is directly beneath the cutin layer and above the palisade cells in the aerial parts of plants?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.59375, "loss": 3.947949171066284 }, { "id": "identity_70283", "image": "ScienceQA/images/243.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the peninsula located on the western part of the highlighted country?" }, { "from": "gpt", "value": "The answer is Tiburon Peninsula" } ], "clip_score": 22.03125, "loss": 2.184511661529541 }, { "id": "identity_332236", "image": "PMC-VQA/images/PMC4478850_F6_399424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tissue indicated by the solid arrow in this image?\nChoices:\n(A) Stele\n(B) Cortex tissue\n(C) Epidermis\n(D) Phloem tissue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.90625, "loss": 4.696677207946777 }, { "id": "identity_212838", "image": "VQA2.0/images/490400.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the man doing in the living room?" }, { "from": "gpt", "value": "The answer is Working on a laptop" } ], "clip_score": 21.03125, "loss": 3.111327886581421 }, { "id": "identity_302552", "image": "DVQA/images/bar_train_00109553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the steelblue color represent?" }, { "from": "gpt", "value": "The answer is decade" } ], "clip_score": 24.234375, "loss": 3.403092622756958 }, { "id": "identity_153709", "image": "AI2D/images/2596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined gravitational effect called in the diagram?" }, { "from": "gpt", "value": "The answer is Pull of Moon and Sun" } ], "clip_score": 33.625, "loss": 1.9672136306762695 }, { "id": "identity_38300", "image": "PMC-VQA/images/PMC5147411_F3_103813.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which brain region is shown in the scatterplots? \nChoices:\n(A) Left post-central/superior parietal region, \n(B) Right post-central/superior parietal region, \n(C) Anterior cingulate cortex, \n(D) Occipital lobe " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.828125, "loss": 4.200440883636475 }, { "id": "identity_207596", "image": "A-OKVQA/images/526801.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many coffee cups can be seen on the desk?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 25.328125, "loss": 4.859748363494873 }, { "id": "identity_271440", "image": "A-OKVQA/images/359686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: During this game, which position is occupied by player number 22?\nChoices:\n(A) left field\n(B) hitter\n(C) pitcher\n(D) short stop" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.703125, "loss": 4.288189888000488 }, { "id": "identity_247804", "image": "IconQA/images/8769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the rectangle and the length of the diagonal?" }, { "from": "gpt", "value": "The answer is Area is 28 square meters, diagonal is approximately 8.06 meters" } ], "clip_score": 33.0, "loss": 2.3857126235961914 }, { "id": "identity_63481", "image": "ScienceQA/images/3269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many magnets are shown in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 20.609375, "loss": 2.307739496231079 }, { "id": "identity_1942", "image": "ScienceQA/images/8694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Kirk's experiment can best answer.\nChoices:\n(A) Does the type of flour used in the muffins affect the number of muffins that turn brown after 30 minutes in the oven?\n(B) Do muffins made with white flour have larger volumes than muffins made with whole wheat flour?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.8125, "loss": 4.082967758178711 }, { "id": "identity_301167", "image": "VQA2.0/images/333634.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this person holding a bat?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 18.171875, "loss": 3.6041436195373535 }, { "id": "identity_5415", "image": "Geometry3K/images/265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diagonals of rhombus F G H J intersect at K. If m \\angle F J H = 82, find m \\angle K H J.\nChoices:\n(A) 15\n(B) 33\n(C) 49\n(D) 82" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.0625, "loss": 2.329871416091919 }, { "id": "identity_307006", "image": "FigureQA/images/71814.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.234375, "loss": 3.9653077125549316 }, { "id": "identity_317571", "image": "GEOS/images/064.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure above, if the measure of an angle is 40*\\degree and the length of an arc is 4*\\pi, what is the area of a sector?\nChoices:\n(A) 4*pi\n(B) 16*pi\n(C) 36*pi\n(D) 128*pi\n(E) 324*pi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.953125, "loss": 2.440570116043091 }, { "id": "identity_40818", "image": "FigureQA/images/5262.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 30.234375, "loss": 3.5311386585235596 }, { "id": "identity_254037", "image": "Super-CLEVR/images/superCLEVR_new_001469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle that is directly in front of the tiny purple metal bus and to the right of the green and gray van?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 28.40625, "loss": 4.077909469604492 }, { "id": "identity_82201", "image": "Geometry3K/images/1322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle x in degrees?" }, { "from": "gpt", "value": "The answer is x = 90°" } ], "clip_score": 35.5, "loss": 1.6417888402938843 }, { "id": "identity_57944", "image": "DVQA/images/bar_train_00176602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer 'sewage' across all categories?" }, { "from": "gpt", "value": "The answer is 26" } ], "clip_score": 26.34375, "loss": 2.4627928733825684 }, { "id": "identity_40636", "image": "FigureQA/images/53009.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have an equal distribution of colors?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.0, "loss": 3.590494155883789 }, { "id": "identity_93143", "image": "TabMWP/images/5019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what time does the bus depart from the park?" }, { "from": "gpt", "value": "The answer is 1:55 P.M" } ], "clip_score": 29.203125, "loss": 1.7616907358169556 }, { "id": "identity_20930", "image": "GeoQA+/images/1364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,PA,PB切⊙O于点A、B,点C是⊙O上一点,且∠P=40°,则∠ACB的大小是()\nChoices:\n(A) 80°\n(B) 70°\n(C) 60°\n(D) 50°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.984375, "loss": 2.7256460189819336 }, { "id": "identity_53879", "image": "FigureQA/images/83769.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Dodger Blue have a data point at 80 on the x-axis?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.484375, "loss": 3.073164224624634 }, { "id": "identity_217951", "image": "PMC-VQA/images/PMC6276199_Fig2_406699.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this MRI image taken before or after contrast administration?" }, { "from": "gpt", "value": "The answer is Pre-contrast MRI image" } ], "clip_score": 33.40625, "loss": 2.608133554458618 }, { "id": "identity_31545", "image": "DVQA/images/bar_train_00022309.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the first bar from the left?" }, { "from": "gpt", "value": "The answer is stress" } ], "clip_score": 20.828125, "loss": 3.8261139392852783 }, { "id": "identity_4095", "image": "FigureQA/images/82092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Khaki greater than Orchid?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.6875, "loss": 4.021373748779297 }, { "id": "identity_160747", "image": "GeoQA+/images/4526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AED in the given square ABCD?" }, { "from": "gpt", "value": "The answer is 65 degrees" } ], "clip_score": 36.6875, "loss": 2.396303176879883 }, { "id": "identity_303171", "image": "DVQA/images/bar_train_00179293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars each a single solid color without any patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 20.109375, "loss": 3.4898452758789062 }, { "id": "identity_76531", "image": "TQA/images/question_images/human_system_digestive_1395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organ is directly connected to the mouth for the passage of food?" }, { "from": "gpt", "value": "The answer is Esophagus" } ], "clip_score": 31.796875, "loss": 1.8088126182556152 }, { "id": "identity_42761", "image": "PlotQA/images/33768.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the amount of aid received in 2012 US$ the lowest?" }, { "from": "gpt", "value": "The answer is 2000" } ], "clip_score": 21.59375, "loss": 1.8173359632492065 }, { "id": "identity_113538", "image": "VQA2.0/images/444809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the skateboarder's hat?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 25.265625, "loss": 3.208564281463623 }, { "id": "identity_335580", "image": "DocVQA/images/njdv0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: When is the honorarium dated?" }, { "from": "gpt", "value": "The answer is JUNE, 1976" } ], "clip_score": 32.25, "loss": 2.2239816188812256 }, { "id": "identity_62372", "image": "DocVQA/images/zjcc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of William J. Darby's tenure as a Professor of Medicine in Nutrition at Vanderbilt University School of Medicine?" }, { "from": "gpt", "value": "The answer is 1965 - [Current Year" } ], "clip_score": 28.484375, "loss": 3.080197811126709 }, { "id": "identity_52769", "image": "DocVQA/images/mkvw0217_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the company under review for the 'Totelle Combined slide kit'?" }, { "from": "gpt", "value": "The answer is DesignWrite" } ], "clip_score": 14.3203125, "loss": 3.0886037349700928 }, { "id": "identity_38280", "image": "A-OKVQA/images/294758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of company is sponsoring this game?\nChoices:\n(A) computer\n(B) car\n(C) basketball\n(D) canned bean" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.1484375, "loss": 4.186125755310059 }, { "id": "identity_172201", "image": "IconQA/images/19310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of triangles in the top row to the total number of triangles?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 28.046875, "loss": 1.555842399597168 }, { "id": "identity_125190", "image": "Super-CLEVR/images/superCLEVR_new_001340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the car's roof?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 20.109375, "loss": 4.287996768951416 }, { "id": "identity_225370", "image": "DocVQA/images/skng0065_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total pages were scanned and confirmed, and what was the total duration of the transmission?" }, { "from": "gpt", "value": "The answer is 1 page scanned, 1 page confirmed, 30 seconds duration" } ], "clip_score": 27.671875, "loss": 2.0474061965942383 }, { "id": "identity_96617", "image": "TabMWP/images/36621.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the number of lemons doubles on each branch, how many lemons are on 2 branches?" }, { "from": "gpt", "value": "The answer is 8 lemons" } ], "clip_score": 32.90625, "loss": 1.8327105045318604 }, { "id": "identity_82681", "image": "GeoQA+/images/3183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Xiao Ming's school at point B is 200 meters away from station O, what is the distance from point B to the north-south line passing through station O?" }, { "from": "gpt", "value": "The answer is approximately 197.36 meters" } ], "clip_score": 31.125, "loss": 2.0389325618743896 }, { "id": "identity_70785", "image": "TQA/images/abc_question_images/aquifers_16522.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the surface water?" }, { "from": "gpt", "value": "The answer is R" } ], "clip_score": 28.6875, "loss": 3.0800251960754395 }, { "id": "identity_47674", "image": "DVQA/images/bar_train_00064068.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for \"fear\"?" }, { "from": "gpt", "value": "The answer is mouth" } ], "clip_score": 23.9375, "loss": 3.4743149280548096 }, { "id": "identity_265451", "image": "AI2D/images/3703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following options is both elastic and used to collect liquid wastes for excretion? \n(A) a\n(B) k\n(C) c\n(D) d" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.3359375, "loss": 4.213000297546387 }, { "id": "identity_98242", "image": "TabMWP/images/28083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of apples picked between the day with the highest yield and the day with the lowest yield?" }, { "from": "gpt", "value": "The answer is 3,066" } ], "clip_score": 38.15625, "loss": 2.4378623962402344 }, { "id": "identity_30755", "image": "TextVQA/images/c36f2e8213322be5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what does the black sign say in front of the pillar?" }, { "from": "gpt", "value": "The answer is ancient egypt book of the dead" } ], "clip_score": 18.109375, "loss": 3.530959367752075 }, { "id": "identity_5203", "image": "DVQA/images/bar_train_00091783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.0, "loss": 3.369765520095825 }, { "id": "identity_309439", "image": "GeoQA+/images/1533.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,四边形ABCD是菱形,对角线相交于点O,AC=8,BD=6,点E是CD上一点,连接OE,若OE=CE,则OE的长是()\n选项:\n(A) 2\n(B) 2.5\n(C) 3\n(D) 4" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 2.6496238708496094 }, { "id": "identity_252931", "image": "A-OKVQA/images/395046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the indicators in the image that suggest the people are touring rather than commuting, eating food, or learning driving?" }, { "from": "gpt", "value": "The answer is The steam locomotive and scenic surroundings suggest touring" } ], "clip_score": 31.1875, "loss": 2.8615269660949707 }, { "id": "identity_283767", "image": "Geometry3K/images/1307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 7.\nOptions:\n(A) 30\n(B) 37\n(C) 49\n(D) 53" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.09375, "loss": 2.9416990280151367 }, { "id": "identity_314105", "image": "PlotQA/images/142987.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total electricity produced in this year in New Zealand?" }, { "from": "gpt", "value": "The answer is 11.7003827227993" } ], "clip_score": 23.375, "loss": 2.726961612701416 }, { "id": "identity_136338", "image": "FigureQA/images/38285.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of colors used in the pie chart?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.375, "loss": 2.0790584087371826 }, { "id": "identity_124002", "image": "PMC-VQA/images/PMC8618346_viruses-13-02109-f003_121105.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other colors present in the segmented 3D volume besides red, yellow, and blue?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 19.546875, "loss": 2.903193712234497 }, { "id": "identity_305022", "image": "TQA/images/question_images/parts_worm_7315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the end of this organism called?\nChoices:\n(A) male pore\n(B) prostomium\n(C) genital markings\n(D) clitellum" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.09375, "loss": 4.634384632110596 }, { "id": "identity_300401", "image": "UniGeo/images/1290.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,PA与⊙O相切于点A,OP与⊙O相交于点C,连接CB,∠OPA=40°,则∠ABC的度数是多少?\nChoices:\n(A) 20°\n(B) 25°\n(C) 30°\n(D) 35°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.421875, "loss": 2.572497606277466 }, { "id": "identity_104643", "image": "A-OKVQA/images/171479.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many athletes are visible in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.984375, "loss": 4.411374092102051 }, { "id": "identity_259898", "image": "TextVQA/images/03f3ed4c05b799d5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name of the organization in charge of waste management, and what region do they serve?" }, { "from": "gpt", "value": "The answer is Agglomération de Rouen, Haute Normandie" } ], "clip_score": 29.046875, "loss": 1.9106913805007935 }, { "id": "identity_132973", "image": "VQA2.0/images/580919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the woman's sweater in the center of the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 15.1171875, "loss": 3.6524524688720703 }, { "id": "identity_153872", "image": "TQA/images/abc_question_images/optics_reflection_19182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label in the diagram represents the normal line?" }, { "from": "gpt", "value": "The answer is the pink line" } ], "clip_score": 33.96875, "loss": 3.0086450576782227 }, { "id": "identity_263134", "image": "Geometry3K/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In triangle ABC, H is the incenter. Determine the length of BD.\nChoices:\n(A) 7√3\n(B) 7\n(C) 7√3\n(D) 14" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 34.5625, "loss": 2.9080605506896973 }, { "id": "identity_115674", "image": "TextVQA/images/0612be26dc019b40.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the bank advertised on the left side of the scoreboard?" }, { "from": "gpt", "value": "The answer is KeyBank" } ], "clip_score": 24.796875, "loss": 2.6153416633605957 }, { "id": "identity_180208", "image": "TabMWP/images/6612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of sheep owned by all farmers combined?" }, { "from": "gpt", "value": "The answer is 283" } ], "clip_score": 36.0, "loss": 1.928272008895874 }, { "id": "identity_138660", "image": "DVQA/images/bar_train_00071692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the second group of bars from the top?" }, { "from": "gpt", "value": "The answer is member" } ], "clip_score": 22.546875, "loss": 4.286175727844238 }, { "id": "identity_184206", "image": "PMC-VQA/images/PMC7795176_polymers-13-00026-f004_338529.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level indicated on the image?" }, { "from": "gpt", "value": "The answer is 40x" } ], "clip_score": 24.140625, "loss": 2.1260757446289062 }, { "id": "identity_210170", "image": "VizWiz/images/VizWiz_train_00018739.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of beer is shown in the image?" }, { "from": "gpt", "value": "The answer is Non-alcoholic" } ], "clip_score": 22.1875, "loss": 3.003113269805908 }, { "id": "identity_270905", "image": "TabMWP/images/30202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs that visited the veterinarian's office last month weighed 54 pounds or more?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 32.03125, "loss": 1.92569100856781 }, { "id": "identity_15199", "image": "GeoQA+/images/5112.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC内接于⊙O,若∠OBC=20°,则∠A的度数为()\nChoices:\n(A) 40°\n(B) 60°\n(C) 70°\n(D) 80°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.984375, "loss": 2.8381147384643555 }, { "id": "identity_141888", "image": "DocVQA/images/tkhg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of The Nutrition Foundation, Inc.?" }, { "from": "gpt", "value": "The answer is NEW YORK 17, N. Y" } ], "clip_score": 21.5625, "loss": 1.7841944694519043 }, { "id": "identity_107759", "image": "VQA-RAD/images/synpic19114.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a pneumothorax in this chest X-ray?" }, { "from": "gpt", "value": "The answer is No pneumothorax present" } ], "clip_score": 33.25, "loss": 2.6895930767059326 }, { "id": "identity_104512", "image": "VQA2.0/images/278219.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the mouse on the desk?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 18.734375, "loss": 3.007167339324951 }, { "id": "identity_214838", "image": "PMC-VQA/images/PMC6373364_Fig1_435169.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label marked at the top left corner of the image?" }, { "from": "gpt", "value": "The answer is 1A" } ], "clip_score": 21.6875, "loss": 2.1964168548583984 }, { "id": "identity_291535", "image": "PMC-VQA/images/PMC4222226_F1_333338.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI slice was utilized to examine the patient out of the following options:\n(A) T1-weighted axial slice\n(B) T2-weighted coronal slice\n(C) T1-weighted sagittal slice\n(D) FLAIR coronal slice" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.53125, "loss": 4.184572696685791 }, { "id": "identity_240839", "image": "GeoQA+/images/5526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点O是△ABC外接圆的圆心,连接OB,若∠1=37°,则∠AOC的度数是多少?" }, { "from": "gpt", "value": "The answer is 106°" } ], "clip_score": 31.59375, "loss": 2.0002641677856445 }, { "id": "identity_302610", "image": "VQA-RAD/images/synpic48714.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type is the hemorrhage?" }, { "from": "gpt", "value": "The answer is Subarachnoid" } ], "clip_score": 23.78125, "loss": 1.985917568206787 }, { "id": "identity_282819", "image": "TabMWP/images/35593.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of glass bottles recycled by the recycling center on Tuesday and Wednesday, based on the log of the past 3 days?" }, { "from": "gpt", "value": "The answer is 4,665" } ], "clip_score": 42.03125, "loss": 2.294512987136841 }, { "id": "identity_242316", "image": "DocVQA/images/frvf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address and contact information of the Division of Professional Registration mentioned in the given page?" }, { "from": "gpt", "value": "The answer is 3523 North Ten Mile Drive, PO Box 1335, Jefferson City, MO 65101, Telephone: 314-751-2334" } ], "clip_score": 26.96875, "loss": 1.743699073791504 }, { "id": "identity_49712", "image": "ChartQA/images/multi_col_40962.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did 30 Mbps have the lowest coverage percentage?" }, { "from": "gpt", "value": "The answer is 2018" } ], "clip_score": 26.421875, "loss": 1.7134579420089722 }, { "id": "identity_196522", "image": "A-OKVQA/images/17882.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car parked closest to the bike rack?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 30.515625, "loss": 3.2220656871795654 }, { "id": "identity_108684", "image": "TextVQA/images/427908f7605df621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the title of the book written in?" }, { "from": "gpt", "value": "The answer is Portuguese" } ], "clip_score": 23.078125, "loss": 4.323892116546631 }, { "id": "identity_338559", "image": "FigureQA/images/29061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the minimum value for Light Coral?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.265625, "loss": 3.998852252960205 }, { "id": "identity_263707", "image": "PMC-VQA/images/PMC9222192_children-09-00878-f003_319925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which staining method was utilized to visualize the left side of the proximal tibia?\nChoices:\n(A) Hematoxylin and eosin staining\n(B) Immunofluorescence staining\n(C) Giemsa staining\n(D) Periodic acid-Schiff staining" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.875, "loss": 3.760538101196289 }, { "id": "identity_248912", "image": "TextVQA/images/f8bc673eb5663a49.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years are covered by the display, and what significant global event occurred during this period?" }, { "from": "gpt", "value": "The answer is 10 years, World War II" } ], "clip_score": 21.8125, "loss": 3.270153284072876 }, { "id": "identity_311291", "image": "FigureQA/images/68969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Violet Red?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.0, "loss": 3.964136838912964 }, { "id": "identity_276126", "image": "FigureQA/images/57737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Aqua not the low median?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.75, "loss": 4.051761627197266 }, { "id": "identity_39133", "image": "DVQA/images/bar_train_00036223.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between \"cancer\" and \"score\" for the category labeled \"drink\"?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.015625, "loss": 2.461252212524414 }, { "id": "identity_84181", "image": "UniGeo/images/226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If line a is parallel to line b, and the measure of angle 1 is 85 degrees, what is the sum of the measures of angle 1 and angle 3?" }, { "from": "gpt", "value": "The answer is 180 degrees" } ], "clip_score": 31.890625, "loss": 1.5092335939407349 }, { "id": "identity_252286", "image": "A-OKVQA/images/390515.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the bench the man to the right of the black vehicle is laying on, and how many vertical bars does the bench have?" }, { "from": "gpt", "value": "The answer is Black, 3" } ], "clip_score": 26.34375, "loss": 4.176799774169922 }, { "id": "identity_186363", "image": "PMC-VQA/images/PMC8044272_Fig1_417220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What potential complications can arise from the location of the foreign body?" }, { "from": "gpt", "value": "The answer is Compression or damage to the spinal cord" } ], "clip_score": 20.015625, "loss": 3.118978261947632 }, { "id": "identity_34844", "image": "PMC-VQA/images/PMC5727375_F1_252022.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the plaque located in the patient's carotid artery? \nChoices:\n(A) Left internal carotid artery \n(B) Right internal carotid artery \n(C) Left external carotid artery \n(D) Right external carotid artery " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.546875, "loss": 3.8379058837890625 }, { "id": "identity_166118", "image": "IconQA/images/84247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple hearts are there in each row?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.71875, "loss": 2.0352609157562256 }, { "id": "identity_82962", "image": "GeoQA+/images/5225.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, if ∠BAC = 90° and AC = 8, find the measure of ∠ACB" }, { "from": "gpt", "value": "The answer is arctan(4/3)" } ], "clip_score": 32.75, "loss": 2.2154617309570312 }, { "id": "identity_13465", "image": "TabMWP/images/37745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To get to know her students better, Ms. Becker surveyed her math students to determine what hobbies and school subjects they prefer. What is the probability that a randomly selected student enjoys poetry and prefers literature? Simplify any fractions." }, { "from": "gpt", "value": "The answer is 3/16" } ], "clip_score": 26.4375, "loss": 1.845043659210205 }, { "id": "identity_159762", "image": "GeoQA+/images/4847.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment DG?" }, { "from": "gpt", "value": "The answer is 10 m" } ], "clip_score": 29.234375, "loss": 3.300414800643921 }, { "id": "identity_207387", "image": "TextVQA/images/7e1c64432aef42fb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of accessory is prominently featured in the bottom left corner?" }, { "from": "gpt", "value": "The answer is Perfume bottle" } ], "clip_score": 28.96875, "loss": 2.4562606811523438 }, { "id": "identity_137524", "image": "FigureQA/images/25771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which color has the second smallest value on the graph?" }, { "from": "gpt", "value": "The answer is Indian Red" } ], "clip_score": 29.6875, "loss": 3.233475923538208 }, { "id": "identity_205619", "image": "A-OKVQA/images/98345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main mode of transportation in this image?" }, { "from": "gpt", "value": "The answer is Boat" } ], "clip_score": 24.859375, "loss": 2.9057047367095947 }, { "id": "identity_92021", "image": "TabMWP/images/13760.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pets owned by the students?" }, { "from": "gpt", "value": "The answer is 124" } ], "clip_score": 32.5625, "loss": 2.2183713912963867 }, { "id": "identity_96152", "image": "TabMWP/images/3541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Tiana buys two pairs of shorts, how much change will she receive from $10.00?" }, { "from": "gpt", "value": "The answer is $2.00" } ], "clip_score": 33.0625, "loss": 1.5787813663482666 }, { "id": "identity_274702", "image": "TextVQA/images/df7966f0d306102e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which sport is depicted on the shirt?" }, { "from": "gpt", "value": "The answer is lacrosse" } ], "clip_score": 26.953125, "loss": 3.0069477558135986 }, { "id": "identity_194389", "image": "PMC-VQA/images/PMC5168473_fig3_106342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What vertebral level is labeled in the image?" }, { "from": "gpt", "value": "The answer is S2" } ], "clip_score": 26.765625, "loss": 2.420273780822754 }, { "id": "identity_130573", "image": "Super-CLEVR/images/superCLEVR_new_001007.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cruiser that is to the right of the articulated bus?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 25.78125, "loss": 4.836052417755127 }, { "id": "identity_211679", "image": "PMC-VQA/images/PMC5550971_RSIF20170203F1_201629.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object have any areas with the value of 0.75000 according to the color scale?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.109375, "loss": 3.1264121532440186 }, { "id": "identity_92000", "image": "IconQA/images/40035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hours and minutes displayed on the clock?" }, { "from": "gpt", "value": "The answer is 6 hours and 0 minutes" } ], "clip_score": 33.8125, "loss": 1.3865399360656738 }, { "id": "identity_139607", "image": "DVQA/images/bar_train_00153639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of units sold per store for the item 'model'?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 27.8125, "loss": 2.0026872158050537 }, { "id": "identity_215235", "image": "TextVQA/images/4657c31bd7744cfc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of this malt beverage?" }, { "from": "gpt", "value": "The answer is Angel's Share" } ], "clip_score": 23.9375, "loss": 2.08477783203125 }, { "id": "identity_170788", "image": "TabMWP/images/25094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Sarah buys 3 kilograms of Bartlett pears and 2 kilograms of Osband pears, what is the average cost per kilogram?" }, { "from": "gpt", "value": "The answer is $1.90" } ], "clip_score": 28.46875, "loss": 2.0392022132873535 }, { "id": "identity_17932", "image": "GeoQA+/images/3739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的弦,OC⊥AB,交⊙O于点C,连接OA,OB,BC,若∠ABC=20°,则∠BAO的度数是()\nChoices:\n(A) 40°\n(B) 45°\n(C) 50°\n(D) 55°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.0, "loss": 2.762183904647827 }, { "id": "identity_259048", "image": "A-OKVQA/images/135244.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps to determine the type of fuel used by this plane, and how does it compare to other types of planes in terms of fuel efficiency and environmental impact?" }, { "from": "gpt", "value": "The answer is Gasoline is less efficient and more polluting than electricity" } ], "clip_score": 20.921875, "loss": 3.2159790992736816 }, { "id": "identity_114080", "image": "PMC-VQA/images/PMC7668509_F3_298995.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which tooth has been replaced by the dental implant in the radiograph?" }, { "from": "gpt", "value": "The answer is First molar" } ], "clip_score": 34.53125, "loss": 3.0989089012145996 }, { "id": "identity_204849", "image": "TextVQA/images/1cce765ca9bd7643.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street where the store and billboard are located?" }, { "from": "gpt", "value": "The answer is Sunny Brow Road" } ], "clip_score": 21.953125, "loss": 4.1313371658325195 }, { "id": "identity_211214", "image": "A-OKVQA/images/530013.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the logo on the field near the batter's box?" }, { "from": "gpt", "value": "The answer is A red \"C\" with a white outline" } ], "clip_score": 28.46875, "loss": 3.12319278717041 }, { "id": "identity_97804", "image": "TabMWP/images/21603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students scored between 70 and 79 points inclusive?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.328125, "loss": 2.1428346633911133 }, { "id": "identity_311989", "image": "Super-CLEVR/images/superCLEVR_new_001777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more large blue things that are to the right of a red object than large metallic aeroplanes?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 19.921875, "loss": 5.1992106437683105 }, { "id": "identity_49012", "image": "FigureQA/images/50458.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the Pale Green section less than 30% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 39.75, "loss": 3.189627170562744 }, { "id": "identity_247080", "image": "TextVQA/images/fbf53aa4f8db3949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different companies have advertisements in this image, and what are their names?" }, { "from": "gpt", "value": "The answer is Five companies: BMK, Agfa, Polaroid, Mido, Watches of Switzerland" } ], "clip_score": 23.71875, "loss": 2.198636054992676 }, { "id": "identity_187846", "image": "PMC-VQA/images/PMC5108780_F5_95231.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any annotations indicating the orientation of the tissue?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.34375, "loss": 3.241955518722534 }, { "id": "identity_315501", "image": "VQA-RAD/images/synpic27601.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where in relation to the mass is the left kidney located in this?" }, { "from": "gpt", "value": "The answer is posteriorly" } ], "clip_score": 28.03125, "loss": 5.113389492034912 }, { "id": "identity_140287", "image": "MapQA/images/map_4133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the darkest color representing the highest percentage of advisers?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 35.84375, "loss": 2.3472800254821777 }, { "id": "identity_231661", "image": "DVQA/images/bar_train_00060869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the highest and the lowest values in the bar chart?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 29.90625, "loss": 2.7909305095672607 }, { "id": "identity_295288", "image": "ChartQA/images/multi_col_100782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the price of a smartphone is 50% of $1000, what will be the price of a computer/tablet at 57%?" }, { "from": "gpt", "value": "The answer is 1140" } ], "clip_score": 25.0625, "loss": 2.723284959793091 }, { "id": "identity_70663", "image": "TQA/images/abc_question_images/human_system_reproductory_17042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the seminal vesicle?" }, { "from": "gpt", "value": "The answer is Label V" } ], "clip_score": 23.46875, "loss": 4.3700714111328125 }, { "id": "identity_113269", "image": "VQA2.0/images/9029.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity are the majority of the people engaged in?" }, { "from": "gpt", "value": "The answer is Waiting or queuing" } ], "clip_score": 14.9296875, "loss": 4.04287576675415 }, { "id": "identity_283376", "image": "ScienceQA/images/12051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which sample has a higher temperature based on the comparison of the average kinetic energies of the particles in each sample?\n(A) sample B\n(B) neither; the samples have the same temperature\n(C) sample A" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.09375, "loss": 3.101299285888672 }, { "id": "identity_131363", "image": "TextVQA/images/01e3e2e8f8e6808c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the zip code for Amity Street?" }, { "from": "gpt", "value": "The answer is 11201" } ], "clip_score": 9.546875, "loss": 1.958469033241272 }, { "id": "identity_293585", "image": "VQA2.0/images/182863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a rug of significant size on the floor?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.34375, "loss": 3.6025032997131348 }, { "id": "identity_277813", "image": "VizWiz/images/VizWiz_train_00007997.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is glass" } ], "clip_score": 25.390625, "loss": 3.3649704456329346 }, { "id": "identity_314205", "image": "Geometry3K/images/446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Use the given shape to determine m \\angle B D C.\nChoices:\n(A) 32\n(B) 40.1\n(C) 52\n(D) 87.9" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 33.75, "loss": 3.0697803497314453 }, { "id": "identity_203944", "image": "A-OKVQA/images/32773.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Overcast" } ], "clip_score": 14.5546875, "loss": 2.6436893939971924 }, { "id": "identity_117559", "image": "A-OKVQA/images/260901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many water towers are present in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.265625, "loss": 2.3760976791381836 }, { "id": "identity_102746", "image": "TabMWP/images/23453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of offices per floor?" }, { "from": "gpt", "value": "The answer is 35 offices" } ], "clip_score": 35.25, "loss": 3.2929234504699707 }, { "id": "identity_181771", "image": "CLEVR-Math/images/CLEVR_train_059945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of objects in the image?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 21.453125, "loss": 2.0014264583587646 }, { "id": "identity_151349", "image": "ScienceQA/images/5617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many poles are labeled on each magnet?" }, { "from": "gpt", "value": "The answer is Two poles on each magnet" } ], "clip_score": 27.046875, "loss": 2.494781017303467 }, { "id": "identity_131971", "image": "A-OKVQA/images/111259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What safety equipment can be seen worn by the individuals?" }, { "from": "gpt", "value": "The answer is Helmets and life jackets" } ], "clip_score": 21.03125, "loss": 1.9019889831542969 }, { "id": "identity_1945", "image": "VQA2.0/images/581770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many engines can be seen at this angle?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 16.359375, "loss": 2.444385528564453 }, { "id": "identity_176444", "image": "IconQA/images/1396.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are in the image in total?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 26.5625, "loss": 2.078566312789917 }, { "id": "identity_250690", "image": "DocVQA/images/tshd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full name and title of the person who wrote this letter?" }, { "from": "gpt", "value": "The answer is Robert J. Hickock, Assistant Vice Chancellor for Medical Affairs" } ], "clip_score": 29.875, "loss": 2.8466298580169678 }, { "id": "identity_292058", "image": "TabMWP/images/4572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of watermelons harvested by the farmer in the last 6 days?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.15625, "loss": 3.8915491104125977 }, { "id": "identity_241082", "image": "VizWiz/images/VizWiz_train_00014628.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand and series of the speaker shown in the image, and what is the total number of letters in the brand and series names combined?" }, { "from": "gpt", "value": "The answer is 22" } ], "clip_score": 19.734375, "loss": 3.8060641288757324 }, { "id": "identity_224982", "image": "TabMWP/images/25050.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of cartwheels done by the gymnast over the five days, and how does it compare to the mean?" }, { "from": "gpt", "value": "The answer is The median is 6, and it is equal to the mean" } ], "clip_score": 33.9375, "loss": 2.2456743717193604 }, { "id": "identity_32269", "image": "A-OKVQA/images/524557.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Why is one kid wearing yellow?\nChoices:\n(A) umpire\n(B) water boy\n(C) goalie\n(D) referee" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.328125, "loss": 3.9882493019104004 }, { "id": "identity_133804", "image": "Super-CLEVR/images/superCLEVR_new_001444.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 22.265625, "loss": 2.39435076713562 }, { "id": "identity_160485", "image": "GeoQA+/images/3419.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the area of triangle CDE is 10, what is the area of triangle CDB?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 30.125, "loss": 1.758963942527771 }, { "id": "identity_210870", "image": "VQA2.0/images/473842.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is shown in the image?" }, { "from": "gpt", "value": "The answer is Motorcycle" } ], "clip_score": 26.875, "loss": 3.276658058166504 }, { "id": "identity_49811", "image": "DocVQA/images/klvg0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the city and state where the On The Run Franchise is located?" }, { "from": "gpt", "value": "The answer is FAIRFAX, VA 22037" } ], "clip_score": 21.796875, "loss": 1.4755336046218872 }, { "id": "identity_40057", "image": "PlotQA/images/40831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the percentage of trade with economies outside the region higher for Sudan in merchandise imports or exports?" }, { "from": "gpt", "value": "The answer is Merchandise exports" } ], "clip_score": 31.6875, "loss": 2.205019474029541 }, { "id": "identity_66587", "image": "TQA/images/question_images/erosion_6862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the opening at the top of the cliff connected to the sea cave?" }, { "from": "gpt", "value": "The answer is blow hole" } ], "clip_score": 30.15625, "loss": 3.3458926677703857 }, { "id": "identity_126196", "image": "VQA2.0/images/545447.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather condition in the photo?" }, { "from": "gpt", "value": "The answer is Partly cloudy" } ], "clip_score": 21.03125, "loss": 2.3588128089904785 }, { "id": "identity_186727", "image": "A-OKVQA/images/192955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animal is harnessed to the train?" }, { "from": "gpt", "value": "The answer is Horse" } ], "clip_score": 25.28125, "loss": 2.3942792415618896 }, { "id": "identity_176626", "image": "CLEVR-Math/images/CLEVR_train_045900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.34375, "loss": 2.551513910293579 }, { "id": "identity_180803", "image": "CLEVR-Math/images/CLEVR_train_058552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are larger than the purple sphere?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.328125, "loss": 2.573906898498535 }, { "id": "identity_45927", "image": "ChartQA/images/multi_col_40268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in average daily time spent on social media between 2012 and 2017 in Q1?" }, { "from": "gpt", "value": "The answer is 20 minutes" } ], "clip_score": 28.71875, "loss": 2.0292062759399414 }, { "id": "identity_306975", "image": "VQA2.0/images/119108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the ground wet?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 16.75, "loss": 4.493509769439697 }, { "id": "identity_135564", "image": "ChartQA/images/multi_col_20496.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between morally acceptable and morally wrong views in 2015?" }, { "from": "gpt", "value": "The answer is 29%" } ], "clip_score": 29.734375, "loss": 2.1879379749298096 }, { "id": "identity_190463", "image": "VQA2.0/images/10711.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the catcher's uniform?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 19.859375, "loss": 3.8286960124969482 }, { "id": "identity_266641", "image": "TabMWP/images/33792.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of pictures taken by the newspaper photographer over the past 7 days?" }, { "from": "gpt", "value": "The answer is 97" } ], "clip_score": 34.4375, "loss": 2.7876946926116943 }, { "id": "identity_299102", "image": "TextVQA/images/47b78464e24b22f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What kind of watch is this?" }, { "from": "gpt", "value": "The answer is harley davidson" } ], "clip_score": 35.59375, "loss": 2.035331964492798 }, { "id": "identity_176292", "image": "TabMWP/images/8783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If a customer buys 2 silver necklaces and 1 jade necklace, how much change will they receive from a $2000 bill?" }, { "from": "gpt", "value": "The answer is $770.00" } ], "clip_score": 34.84375, "loss": 1.9518994092941284 }, { "id": "identity_65882", "image": "ScienceQA/images/10212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 31.46875, "loss": 2.1369779109954834 }, { "id": "identity_147062", "image": "DocVQA/images/qyfg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the person to whom the letter is addressed?" }, { "from": "gpt", "value": "The answer is Robert E. Shank" } ], "clip_score": 23.671875, "loss": 2.5114259719848633 }, { "id": "identity_48888", "image": "DVQA/images/bar_train_00043413.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the item with the highest sales and the item with the second highest sales?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 31.921875, "loss": 2.736997604370117 }, { "id": "identity_315415", "image": "Super-CLEVR/images/superCLEVR_new_000235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a tiny object of the same color as this?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 13.46875, "loss": 4.585840225219727 }, { "id": "identity_202943", "image": "TextVQA/images/9e49151c28fe87ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the movie being advertised at the bottom of the poster?" }, { "from": "gpt", "value": "The answer is LA REINA CONDENADOS" } ], "clip_score": 21.15625, "loss": 3.3543477058410645 }, { "id": "identity_197231", "image": "VQA2.0/images/454745.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the motorcycle?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 28.484375, "loss": 3.3460209369659424 }, { "id": "identity_123956", "image": "Super-CLEVR/images/superCLEVR_new_001221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 24.6875, "loss": 4.635794639587402 }, { "id": "identity_83127", "image": "GeoQA+/images/5683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠1 is 42°, what is the measure of the angle formed by extending line OB in the direction of point A?" }, { "from": "gpt", "value": "The answer is 138°" } ], "clip_score": 32.125, "loss": 1.5529494285583496 }, { "id": "identity_84015", "image": "GeoQA+/images/373.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of line segment BM if AN is 2cm and point M is the midpoint of line segment AB?" }, { "from": "gpt", "value": "The answer is 6cm" } ], "clip_score": 26.0625, "loss": 2.5621211528778076 }, { "id": "identity_39552", "image": "FigureQA/images/14153.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sections does the pie chart have?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 33.71875, "loss": 2.5699050426483154 }, { "id": "identity_322719", "image": "DVQA/images/bar_train_00160490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the lowest accuracy for any of these datasets?" }, { "from": "gpt", "value": "The answer is planet" } ], "clip_score": 31.546875, "loss": 3.581528902053833 }, { "id": "identity_65695", "image": "ScienceQA/images/16998.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is being depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Diffusion" } ], "clip_score": 29.78125, "loss": 2.7687180042266846 }, { "id": "identity_129691", "image": "VQA-AS/images/18440.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many curtains are visible in the window?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.46875, "loss": 2.5282697677612305 }, { "id": "identity_291567", "image": "DocVQA/images/nfgn0226_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you provide the name of the device?" }, { "from": "gpt", "value": "The answer is DePuy Pinnacle Metal-on-Metal Acetabular Cup Liners" } ], "clip_score": 21.984375, "loss": 1.8272250890731812 }, { "id": "identity_225356", "image": "TQA/images/question_images/hydrocarbons_8094.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique carbon environments are present in each of the given isomers of pentane?" }, { "from": "gpt", "value": "The answer is Pentane: 1, Isopentane: 3, Neopentane: 2" } ], "clip_score": 32.78125, "loss": 1.3938775062561035 }, { "id": "identity_11978", "image": "GeoQA+/images/3273.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在▱ABCD中,M是BC延长线上的一点,若∠A=135°,则∠MCD的度数是()\nChoices:\n(A) 45°\n(B) 55°\n(C) 65°\n(D) 75°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.5, "loss": 2.432096004486084 }, { "id": "identity_241756", "image": "ScienceQA/images/7862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly south of the colony shown in dark green?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 33.59375, "loss": 3.2649776935577393 }, { "id": "identity_172770", "image": "IconQA/images/80172.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the total number of shapes are hearts?" }, { "from": "gpt", "value": "The answer is 1/1 or 1" } ], "clip_score": 29.9375, "loss": 2.2033209800720215 }, { "id": "identity_58761", "image": "DVQA/images/bar_train_00089909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did any item sell exactly 8 units in the 'blind' store?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.296875, "loss": 3.0441575050354004 }, { "id": "identity_5022", "image": "FigureQA/images/73099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red greater than Forest Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.4375, "loss": 3.57893705368042 }, { "id": "identity_103052", "image": "CLEVR-Math/images/CLEVR_train_010778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different colors are present among the objects?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 26.53125, "loss": 2.037172317504883 }, { "id": "identity_169842", "image": "IconQA/images/62969.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more paper clips would be needed to measure a line that is twice as long?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.125, "loss": 2.254514217376709 }, { "id": "identity_239205", "image": "A-OKVQA/images/278528.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary natural element responsible for the movement of the flying object, and how does this element interact with the object to enable its flight?" }, { "from": "gpt", "value": "The answer is wind" } ], "clip_score": 19.609375, "loss": 4.7780280113220215 }, { "id": "identity_129055", "image": "VizWiz/images/VizWiz_train_00005486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of human activity in the image?" }, { "from": "gpt", "value": "The answer is No visible evidence of human activity" } ], "clip_score": 12.4375, "loss": 3.1140875816345215 }, { "id": "identity_74122", "image": "TQA/images/abc_question_images/human_system_eye_16133.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the eye is indicated by the label 'D'?" }, { "from": "gpt", "value": "The answer is Cornea" } ], "clip_score": 33.28125, "loss": 2.9158008098602295 }, { "id": "identity_8228", "image": "IconQA/images/68092.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If you select a marble without looking, how likely is it that you will pick a black one?\nChoices:\n(A) unlikely\n(B) certain\n(C) probable\n(D) impossible" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 22.375, "loss": 3.8823599815368652 }, { "id": "identity_186348", "image": "TextVQA/images/594b73dfed3eb809.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any runners in a lying down position?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.375, "loss": 3.089237928390503 }, { "id": "identity_137467", "image": "ChartQA/images/multi_col_60954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of Twitter followers in March 2017 and March 2018?" }, { "from": "gpt", "value": "The answer is 3.67 million" } ], "clip_score": 27.796875, "loss": 2.2648935317993164 }, { "id": "identity_215262", "image": "A-OKVQA/images/380248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any electronic devices on the bookshelf?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.671875, "loss": 3.2243564128875732 }, { "id": "identity_94323", "image": "TabMWP/images/6418.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the reporter interview the least number of people?" }, { "from": "gpt", "value": "The answer is Thursday and Friday" } ], "clip_score": 34.8125, "loss": 2.4799609184265137 }, { "id": "identity_308462", "image": "A-OKVQA/images/54178.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What elevation change do the seated individuals desire?\nChoices:\n(A) lower\n(B) none\n(C) same\n(D) higher" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 9.9453125, "loss": 3.6985230445861816 }, { "id": "identity_27395", "image": "TabMWP/images/17241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Sidney buys 3 pounds of ranch popcorn, how much will she spend?" }, { "from": "gpt", "value": "The answer is 11.19" } ], "clip_score": 31.1875, "loss": 2.52156400680542 }, { "id": "identity_190518", "image": "A-OKVQA/images/127074.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 14.703125, "loss": 2.151704788208008 }, { "id": "identity_277014", "image": "Super-CLEVR/images/superCLEVR_new_001361.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the blue bus made of, considering it is the same size as the red bus?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 21.15625, "loss": 3.4545280933380127 }, { "id": "identity_152738", "image": "ScienceQA/images/13369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state to the northwest of the highlighted state?" }, { "from": "gpt", "value": "The answer is Idaho" } ], "clip_score": 31.8125, "loss": 2.957738161087036 }, { "id": "identity_62297", "image": "DVQA/images/bar_train_00183982.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item 'lap'?" }, { "from": "gpt", "value": "The answer is 30 units" } ], "clip_score": 22.796875, "loss": 2.436880588531494 }, { "id": "identity_316166", "image": "TextVQA/images/1c37dca03d3afa3e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the coffee mug say they are trying to be today?" }, { "from": "gpt", "value": "The answer is awesome" } ], "clip_score": 35.875, "loss": 3.8729259967803955 }, { "id": "identity_62743", "image": "AI2D/images/1841.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary food source for cows as shown in the image?" }, { "from": "gpt", "value": "The answer is Grass" } ], "clip_score": 22.84375, "loss": 2.493767499923706 }, { "id": "identity_172221", "image": "TabMWP/images/16730.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries have between 31 and 40 athletes inclusive?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 19.5, "loss": 2.1956660747528076 }, { "id": "identity_237897", "image": "ChartQA/images/12194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much does the sum of Somewhat unfavorable, Favorable, and Don't know differ from the percentage of Very unfavorable?" }, { "from": "gpt", "value": "The answer is 6%" } ], "clip_score": 35.90625, "loss": 2.0679850578308105 }, { "id": "identity_273967", "image": "AI2D/images/647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which season is represented by the letter B?\nChoices:\n(A) Autumn\n(B) Summer\n(C) Winter\n(D) Spring" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.765625, "loss": 4.237864017486572 }, { "id": "identity_277508", "image": "DVQA/images/bar_train_00065508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars is labeled at the bottom?" }, { "from": "gpt", "value": "The answer is lay" } ], "clip_score": 21.0, "loss": 4.673361301422119 }, { "id": "identity_143730", "image": "DVQA/images/bar_train_00103031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does any algorithm have an accuracy of exactly 5 on the 'chain' dataset?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.4375, "loss": 2.854454278945923 }, { "id": "identity_197258", "image": "PMC-VQA/images/PMC4650277_Fig1_445147.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is primarily being visualized in this image?" }, { "from": "gpt", "value": "The answer is The heart" } ], "clip_score": 21.671875, "loss": 4.1877946853637695 }, { "id": "identity_230877", "image": "IconQA/images/57518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that the spinner will land on white, and how does it compare to the probability of landing on pink?" }, { "from": "gpt", "value": "The answer is The probability of landing on white is higher than on pink" } ], "clip_score": 34.09375, "loss": 2.2544569969177246 }, { "id": "identity_316050", "image": "FigureQA/images/68551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the area under the curve for Violet Red the smallest?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.671875, "loss": 3.8834328651428223 }, { "id": "identity_285548", "image": "VQA2.0/images/518966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What object is the woman currently holding in her hands?" }, { "from": "gpt", "value": "The answer is surfboard" } ], "clip_score": 31.703125, "loss": 2.277242660522461 }, { "id": "identity_167369", "image": "TabMWP/images/26293.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median score of the science test?" }, { "from": "gpt", "value": "The answer is (68 + 68) / 2 = 68" } ], "clip_score": 28.0625, "loss": 1.5942360162734985 }, { "id": "identity_182991", "image": "PMC-VQA/images/PMC7850656_F1_357393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of midline shift due to the abnormality?" }, { "from": "gpt", "value": "The answer is No evidence of midline shift" } ], "clip_score": 20.984375, "loss": 2.735638380050659 }, { "id": "identity_317165", "image": "VQA-RAD/images/synpic42951.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is observed in this image?" }, { "from": "gpt", "value": "The answer is small subdural hematoma with cerebral edema" } ], "clip_score": 34.46875, "loss": 2.5531511306762695 }, { "id": "identity_223916", "image": "FigureQA/images/25657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Blue Violet and Light Seafoam?" }, { "from": "gpt", "value": "The answer is 40%" } ], "clip_score": 34.75, "loss": 1.9604554176330566 }, { "id": "identity_131798", "image": "A-OKVQA/images/194421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible logos on the skateboarder's t-shirt?" }, { "from": "gpt", "value": "The answer is Yes, there is a visible logo on the t-shirt" } ], "clip_score": 25.953125, "loss": 1.6942472457885742 }, { "id": "identity_93066", "image": "IconQA/images/37479.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of images in the sequence?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 16.328125, "loss": 1.6649945974349976 }, { "id": "identity_209312", "image": "VQA2.0/images/579938.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the front of the red bus?" }, { "from": "gpt", "value": "The answer is 276" } ], "clip_score": 17.28125, "loss": 2.210085868835449 }, { "id": "identity_332613", "image": "GeoQA+/images/2347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AD是△ABC的角平分线,DF垂直于AB,垂足为F,DE等于DG,△ADG和△AED的面积分别为29和16,则△EDF的面积为()\n选项:\n(A) 13\n(B) 6.5\n(C) 11\n(D) 5.5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.46875, "loss": 2.3922104835510254 }, { "id": "identity_201376", "image": "PMC-VQA/images/PMC7166296_fig2_153917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique was used to produce this image?" }, { "from": "gpt", "value": "The answer is X-ray" } ], "clip_score": 30.0625, "loss": 1.9697856903076172 }, { "id": "identity_228628", "image": "ScienceQA/images/17796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which colony is located directly south of the colony shown in dark green?" }, { "from": "gpt", "value": "The answer is Connecticut" } ], "clip_score": 30.921875, "loss": 3.5219435691833496 }, { "id": "identity_302396", "image": "DVQA/images/bar_train_00072013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the smallest total value?" }, { "from": "gpt", "value": "The answer is island" } ], "clip_score": 27.0625, "loss": 2.7956206798553467 }, { "id": "identity_260273", "image": "A-OKVQA/images/246001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the main components of the setup in the classroom, and how do they contribute to the learning environment?" }, { "from": "gpt", "value": "The answer is Projector, laptops, headphones, clock, whiteboard, chairs, podium" } ], "clip_score": 34.53125, "loss": 2.5273122787475586 }, { "id": "identity_286899", "image": "ScienceQA/images/6411.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the passage discussing the weather or the climate? \nOptions:\n(A) weather\n(B) climate" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 8.53125, "loss": 4.792986869812012 }, { "id": "identity_249732", "image": "ScienceQA/images/1125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the state highlighted in the image and name its capital. Then, list the capitals of the states that border this state" }, { "from": "gpt", "value": "The answer is Cheyenne, Lincoln, Topeka, Oklahoma City, Santa Fe, Salt Lake City, Phoenix" } ], "clip_score": 35.125, "loss": 2.1743340492248535 }, { "id": "identity_824", "image": "FigureQA/images/73876.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Blue have the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.484375, "loss": 4.3062028884887695 }, { "id": "identity_327629", "image": "TextVQA/images/a9ec28c332244746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this a child?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.0546875, "loss": 3.381922960281372 }, { "id": "identity_315641", "image": "FigureQA/images/59221.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Forest Green the highest median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.75, "loss": 3.8096671104431152 }, { "id": "identity_68093", "image": "ScienceQA/images/1480.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style are the buildings in the image?" }, { "from": "gpt", "value": "The answer is Traditional Yemeni architecture" } ], "clip_score": 29.921875, "loss": 3.1243882179260254 }, { "id": "identity_251323", "image": "A-OKVQA/images/268590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the plant from which the yellow item on the plate originates and describe its common culinary uses and health benefits" }, { "from": "gpt", "value": "The answer is Mustard, used in condiments, anti-inflammatory, aids digestion" } ], "clip_score": 14.6796875, "loss": 2.36142635345459 }, { "id": "identity_212294", "image": "A-OKVQA/images/344040.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is It is a clear and sunny day" } ], "clip_score": 18.65625, "loss": 2.311894178390503 }, { "id": "identity_337029", "image": "DocVQA/images/mgvh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the specified year for the contract?" }, { "from": "gpt", "value": "The answer is 7/1/79-6/30/80" } ], "clip_score": 19.921875, "loss": 1.6659499406814575 }, { "id": "identity_56113", "image": "MapQA/images/map_2250.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the percentage of liberals in California higher than in Nevada?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 30.046875, "loss": 3.223994016647339 }, { "id": "identity_317096", "image": "ChartQA/images/two_col_45.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which two groups have the largest number of mass shooters in the US?" }, { "from": "gpt", "value": "The answer is [White, Black]" } ], "clip_score": 25.25, "loss": 2.0189049243927 }, { "id": "identity_86828", "image": "GeoQA+/images/4181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In triangle ABC, AB=AC, and the angle bisector of ∠BAC intersects the perpendicular bisector of AB at point O. If ∠BAC = 58°, what is the measure of ∠DOC?" }, { "from": "gpt", "value": "The answer is 122°" } ], "clip_score": 33.8125, "loss": 1.7408169507980347 }, { "id": "identity_316744", "image": "FigureQA/images/42432.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this color less than Brown?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.390625, "loss": 4.017153739929199 }, { "id": "identity_125413", "image": "PMC-VQA/images/PMC8840569_sensors-22-00821-f001_193301.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any devices or medical interventions present in the image?" }, { "from": "gpt", "value": "The answer is Central venous catheter (CVC)" } ], "clip_score": 32.1875, "loss": 2.2783217430114746 }, { "id": "identity_269929", "image": "FigureQA/images/85025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Pale Green the color with the largest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.125, "loss": 4.073538780212402 }, { "id": "identity_196797", "image": "PMC-VQA/images/PMC7906016_F3_374088.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is shown in the image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 31.453125, "loss": 2.6650328636169434 }, { "id": "identity_184227", "image": "VQA2.0/images/450052.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Cloudy" } ], "clip_score": 15.5390625, "loss": 2.772557497024536 }, { "id": "identity_54253", "image": "DVQA/images/bar_train_00179219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the \"mud\" category of the \"adult\" group and the \"mud\" category of the \"mood\" group?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.921875, "loss": 2.8012773990631104 }, { "id": "identity_56349", "image": "FigureQA/images/8515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the legend in the pie chart horizontal or vertical?" }, { "from": "gpt", "value": "The answer is Vertical" } ], "clip_score": 27.484375, "loss": 3.0568153858184814 }, { "id": "identity_231157", "image": "DocVQA/images/frjf0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of communications and reports required regarding TPNA pharmaceutical products, and how do they relate to reimbursement and pricing?" }, { "from": "gpt", "value": "The answer is Reports, communications, invoices, projections, profit statements, and meetings" } ], "clip_score": 22.96875, "loss": 3.3804006576538086 }, { "id": "identity_74163", "image": "ScienceQA/images/6449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state directly south of the highlighted state?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 29.375, "loss": 3.8664469718933105 }, { "id": "identity_337619", "image": "TextVQA/images/10b03c7f215f04e8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: According to this, what should one be focused on?" }, { "from": "gpt", "value": "The answer is mathmatics" } ], "clip_score": 16.359375, "loss": 4.0658345222473145 }, { "id": "identity_334922", "image": "A-OKVQA/images/449103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the person in the black boots doing with the can?\nChoices:\n(A) throwing\n(B) buying\n(C) drinking\n(D) recycling" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.8125, "loss": 4.363205909729004 }, { "id": "identity_328992", "image": "A-OKVQA/images/529545.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who do these umbrellas belong to?\nChoices:\n(A) residents\n(B) store\n(C) city\n(D) restaurant" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.015625, "loss": 4.207708835601807 }, { "id": "identity_125137", "image": "A-OKVQA/images/48943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many stars are visible on the airplane's wing?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.484375, "loss": 2.454019784927368 }, { "id": "identity_68609", "image": "AI2D/images/4474.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of arch is shown on the right side of the image?" }, { "from": "gpt", "value": "The answer is Transverse arch" } ], "clip_score": 27.90625, "loss": 2.668835163116455 }, { "id": "identity_96204", "image": "TabMWP/images/17145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more tickets were sold on Friday compared to Tuesday?" }, { "from": "gpt", "value": "The answer is 221" } ], "clip_score": 33.71875, "loss": 2.4637603759765625 }, { "id": "identity_253200", "image": "PlotQA/images/19793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average employment to population ratio for Guinea from 1992 to 1999?" }, { "from": "gpt", "value": "The answer is 77.1" } ], "clip_score": 30.0, "loss": 2.334346294403076 }, { "id": "identity_28020", "image": "A-OKVQA/images/430125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What sort of area is the man eating pizza in?\nChoices:\n(A) cow pasture\n(B) midway\n(C) ski lodge\n(D) front yard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.703125, "loss": 4.373965263366699 }, { "id": "identity_118091", "image": "VQA2.0/images/579186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a ceiling light fixture present in the room?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.953125, "loss": 3.2433547973632812 }, { "id": "identity_222523", "image": "Super-CLEVR/images/superCLEVR_new_000883.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the larger minivan?" }, { "from": "gpt", "value": "The answer is Teal" } ], "clip_score": 27.140625, "loss": 3.4351301193237305 }, { "id": "identity_298782", "image": "TabMWP/images/31918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on Hansen's lunch expenses recorded in the table, what was the difference in spending between Thursday and Friday?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.5625, "loss": 3.6897506713867188 }, { "id": "identity_169147", "image": "IconQA/images/13605.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many centimeters are there between the 2 cm and 4 cm marks on the ruler?" }, { "from": "gpt", "value": "The answer is 2 centimeters" } ], "clip_score": 31.65625, "loss": 1.3508363962173462 }, { "id": "identity_46303", "image": "PlotQA/images/90907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Afghanistan have the lowest percentage of children who received DPT vaccinations?" }, { "from": "gpt", "value": "The answer is 2001" } ], "clip_score": 31.640625, "loss": 1.8135762214660645 }, { "id": "identity_287853", "image": "UniGeo/images/1907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, AD is the median of triangle ABC, AB = 5, AC = 3. The difference between the perimeter of triangle ABD and the perimeter of triangle ACD is ()\nChoices:\n(A) 6\n(B) 3\n(C) 2\n(D) Uncertain" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.8125, "loss": 2.1030192375183105 }, { "id": "identity_164661", "image": "UniGeo/images/1694.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that CD is the diameter of circle O and chord DE is parallel to radius OA, find the length of chord DE if the radius of the circle is 5 units" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 32.75, "loss": 2.123936653137207 }, { "id": "identity_151209", "image": "ScienceQA/images/7027.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which of these states is located directly east of Nevada?" }, { "from": "gpt", "value": "The answer is Utah" } ], "clip_score": 29.078125, "loss": 2.728334426879883 }, { "id": "identity_266265", "image": "TextVQA/images/be2ff261f83dae4d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By whom was this beer made?" }, { "from": "gpt", "value": "The answer is new blegium" } ], "clip_score": 21.671875, "loss": 3.3580384254455566 }, { "id": "identity_162726", "image": "GeoQA+/images/187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If arc BC equals arc CD equals arc DE and angle COD is 35 degrees, what is the measure of angle BOD?" }, { "from": "gpt", "value": "The answer is 105 degrees" } ], "clip_score": 32.96875, "loss": 1.7405495643615723 }, { "id": "identity_300823", "image": "UniGeo/images/485.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,已知某圆锥轴截面等腰三角形的底边和高线长均为10cm,则这个圆锥的侧面积为多少?\n\n选项:\n(A) 50√5πcm^{2}\n(B) 50πcm^{2}\n(C) 25πcm^{2}\n(D) 25√5πcm^{2}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.453125, "loss": 2.2625718116760254 }, { "id": "identity_4752", "image": "TabMWP/images/18557.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At a swim meet, Barbara noted the size of various swim teams. How many teams have at least 40 swimmers but fewer than 64 swimmers?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 26.953125, "loss": 2.3557794094085693 }, { "id": "identity_276209", "image": "IconQA/images/89559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate length of the line in centimeters when measured to the nearest centimeter using the ruler?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.65625, "loss": 2.9267921447753906 }, { "id": "identity_198259", "image": "VQA2.0/images/31948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people visible on the platform?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.515625, "loss": 3.2062888145446777 }, { "id": "identity_161529", "image": "GeoQA+/images/4039.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 1 is 62 degrees, what is the measure of angle AFD?" }, { "from": "gpt", "value": "The answer is 124 degrees" } ], "clip_score": 32.09375, "loss": 1.9126620292663574 }, { "id": "identity_51460", "image": "DVQA/images/bar_train_00181685.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of values represented by the green bars with dots?" }, { "from": "gpt", "value": "The answer is 21" } ], "clip_score": 21.15625, "loss": 2.385437250137329 }, { "id": "identity_291036", "image": "ScienceQA/images/1352.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Among the states listed, which state is located the farthest south? (A) Wisconsin, (B) North Dakota, (C) Arizona, or (D) Ohio." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.609375, "loss": 3.684779644012451 }, { "id": "identity_25636", "image": "TextVQA/images/4c1585f044724f7e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the name of book by epictetus?" }, { "from": "gpt", "value": "The answer is manual for living" } ], "clip_score": 27.671875, "loss": 2.727736711502075 }, { "id": "identity_187603", "image": "PMC-VQA/images/PMC5152543_F0001_104355.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the dark area in the middle of the brain scan?" }, { "from": "gpt", "value": "The answer is A thin vertical line" } ], "clip_score": 31.734375, "loss": 4.585653305053711 }, { "id": "identity_208211", "image": "PMC-VQA/images/PMC6463157_jcm-08-00316-f003_465228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color in the central region of the brain image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 32.53125, "loss": 3.513023853302002 }, { "id": "identity_168316", "image": "TabMWP/images/13184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of requests for Rob to the requests for Shane?" }, { "from": "gpt", "value": "The answer is 27:13 (simplified from 270:130)" } ], "clip_score": 28.0, "loss": 1.8033803701400757 }, { "id": "identity_301273", "image": "Geometry3K/images/392.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The diameters of three circles are 8 inches, 18 inches, and 11 inches, respectively. Find F B.\nChoices:\n(A) 4\n(B) 5\n(C) 5.5\n(D) 11" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.25, "loss": 2.1402697563171387 }, { "id": "identity_20822", "image": "TabMWP/images/4757.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: An insurance company compiled the number of doctor visits made by some patients last year. How many patients went to the doctor at least 1 time?" }, { "from": "gpt", "value": "The answer is 44" } ], "clip_score": 30.78125, "loss": 2.5038111209869385 }, { "id": "identity_35477", "image": "Geometry3K/images/859.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x so that the quadrilateral is a parallelogram.\nChoices:\n(A) 6\n(B) 12\n(C) 24\n(D) 30" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.34375, "loss": 2.7305831909179688 }, { "id": "identity_151718", "image": "ScienceQA/images/9170.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the offspring will have red eyes if E represents red eyes and e represents white eyes in fruit flies?" }, { "from": "gpt", "value": "The answer is 100%" } ], "clip_score": 26.28125, "loss": 1.1499121189117432 }, { "id": "identity_257848", "image": "UniGeo/images/909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知∠AOB的度数为100°,且∠ACB的度数为130°,则∠OAC的度数为多少?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 32.6875, "loss": 1.7676026821136475 }, { "id": "identity_241501", "image": "FigureQA/images/75983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total area under the curve for each color and determine the difference between the area under the Violet Red curve and the area under the Blue curve" }, { "from": "gpt", "value": "The answer is 195" } ], "clip_score": 27.0625, "loss": 3.1026153564453125 }, { "id": "identity_284072", "image": "CLEVR-Math/images/CLEVR_train_068337.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you add 2 big metal balls, how many big metal balls remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.109375, "loss": 2.2952985763549805 }, { "id": "identity_258368", "image": "TabMWP/images/12695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the total surplus or shortage if the price changes from $365 to $525" }, { "from": "gpt", "value": "The answer is 17,000" } ], "clip_score": 24.109375, "loss": 1.912319302558899 }, { "id": "identity_171156", "image": "IconQA/images/95035.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the width to the height of the original letter 'R' in the grid?" }, { "from": "gpt", "value": "The answer is 3:5" } ], "clip_score": 29.015625, "loss": 1.7410327196121216 }, { "id": "identity_219313", "image": "A-OKVQA/images/24980.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signals visible in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 15.2109375, "loss": 3.1282589435577393 }, { "id": "identity_89055", "image": "GeoQA+/images/2563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the triangle formed by the centers of the three sectors?" }, { "from": "gpt", "value": "The answer is sqrt(3)" } ], "clip_score": 30.625, "loss": 1.825764775276184 }, { "id": "identity_104457", "image": "A-OKVQA/images/489961.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What direction does the arrow on the sign point to?" }, { "from": "gpt", "value": "The answer is Left" } ], "clip_score": 23.0, "loss": 3.1554365158081055 }, { "id": "identity_196423", "image": "TextVQA/images/293f507bd888d00a.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many smaller clock faces are there around the main clock face?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 22.484375, "loss": 4.302156448364258 }, { "id": "identity_277097", "image": "ChartQA/images/9596.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the variance in the preferred distribution between Democrats and Independents?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 15.09375, "loss": 2.6821627616882324 }, { "id": "identity_187993", "image": "PMC-VQA/images/PMC5585196_F2_211657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the opacity level of the brain in the image?" }, { "from": "gpt", "value": "The answer is Semi-transparent" } ], "clip_score": 29.609375, "loss": 2.537594795227051 }, { "id": "identity_67747", "image": "TQA/images/abc_question_images/parts_worm_17315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'F' indicate on the worm diagram?" }, { "from": "gpt", "value": "The answer is Clitellum" } ], "clip_score": 25.015625, "loss": 3.9333345890045166 }, { "id": "identity_102899", "image": "TabMWP/images/12836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of billboards per city?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 17.5625, "loss": 2.867957353591919 }, { "id": "identity_130388", "image": "A-OKVQA/images/422157.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 25.265625, "loss": 2.609619617462158 }, { "id": "identity_217216", "image": "A-OKVQA/images/267042.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tagline written on the laptop screen?" }, { "from": "gpt", "value": "The answer is A REFLECTION OF YOU" } ], "clip_score": 18.40625, "loss": 1.8811613321304321 }, { "id": "identity_258887", "image": "MapQA/images/map_5614.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of revenues for all states in the 524-1460 range and the 1541-1853 range combined?" }, { "from": "gpt", "value": "The answer is 14979 to 24651" } ], "clip_score": 34.40625, "loss": 2.509049415588379 }, { "id": "identity_87334", "image": "UniGeo/images/3326.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AO is parallel to DC and ∠AOD = 70°, what is the measure of ∠ABC?" }, { "from": "gpt", "value": "The answer is 150°" } ], "clip_score": 31.6875, "loss": 1.7590793371200562 }, { "id": "identity_134866", "image": "DVQA/images/bar_train_00165888.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of people preferring bulk to those preferring faith?" }, { "from": "gpt", "value": "The answer is 0.5" } ], "clip_score": 29.890625, "loss": 2.232123374938965 }, { "id": "identity_201587", "image": "VQA2.0/images/330516.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is It is sunny and clear" } ], "clip_score": 13.3671875, "loss": 2.854245662689209 }, { "id": "identity_219894", "image": "TextVQA/images/01b1c631df04947b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the eyeshadow in the bottom right image?" }, { "from": "gpt", "value": "The answer is The eyeshadow is pink" } ], "clip_score": 21.421875, "loss": 2.623737096786499 }, { "id": "identity_319644", "image": "IconQA/images/7307.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 19.390625, "loss": 2.2698357105255127 }, { "id": "identity_57034", "image": "DVQA/images/bar_train_00051294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in the number of people who prefer 'charm' between the 'tire' and 'beat' categories?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.75, "loss": 2.4417953491210938 }, { "id": "identity_111034", "image": "Super-CLEVR/images/superCLEVR_new_000540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many wheels are visible on the purple metallic bicycle?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.875, "loss": 2.0072977542877197 }, { "id": "identity_298924", "image": "TextVQA/images/10b03c7f215f04e8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main focus according to the poster?" }, { "from": "gpt", "value": "The answer is mathmatics" } ], "clip_score": 17.234375, "loss": 4.466200828552246 }, { "id": "identity_256734", "image": "A-OKVQA/images/456886.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different motorcycle brands can be seen advertised in the image, and what are they?" }, { "from": "gpt", "value": "The answer is Two brands: Harley-Davidson and Outlaw" } ], "clip_score": 23.0625, "loss": 2.1997599601745605 }, { "id": "identity_16348", "image": "TabMWP/images/18848.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To determine whether people make fewer trips to the movies when the economy is bad, a reporter asked people how many movies they saw last month. How many people went to the movies more than 1 time?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 38.78125, "loss": 2.033376455307007 }, { "id": "identity_260917", "image": "AI2D/images/4438.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Compare and contrast the structural differences between heterocercal, homocercal, and diphycercal tails, and explain how these differences relate to the swimming styles and habitats of the fish that possess them" }, { "from": "gpt", "value": "The answer is Heterocercal provides lift, homocercal provides thrust, diphycercal undulates" } ], "clip_score": 31.90625, "loss": 2.260023832321167 }, { "id": "identity_10557", "image": "TQA/images/question_images/human_system_ear_6098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the malleus connected to?\nChoices:\n(A) staples\n(B) semicircular canals\n(C) cochlea\n(D) incus" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.703125, "loss": 4.1214165687561035 }, { "id": "identity_334994", "image": "TextVQA/images/02a657b3b00696a0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How is this character described?" }, { "from": "gpt", "value": "The answer is bad company" } ], "clip_score": 12.8515625, "loss": 4.989513397216797 }, { "id": "identity_56583", "image": "MapQA/images/map_5006.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of owners in Texas?" }, { "from": "gpt", "value": "The answer is 5.1%-7.0%" } ], "clip_score": 29.90625, "loss": 2.026628255844116 }, { "id": "identity_328441", "image": "TabMWP/images/37704.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is it linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.90625, "loss": 4.279753684997559 }, { "id": "identity_122825", "image": "PMC-VQA/images/PMC3281964_pone-0031618-g001_126322.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate size of the ant compared to the scale bar in the image?" }, { "from": "gpt", "value": "The answer is The ant is approximately half the length of the scale bar" } ], "clip_score": 27.34375, "loss": 2.3398001194000244 }, { "id": "identity_110061", "image": "A-OKVQA/images/530546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of animals are visible in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Dogs" } ], "clip_score": 24.96875, "loss": 2.273620367050171 }, { "id": "identity_205748", "image": "VQA2.0/images/77928.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the horse's halter?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 27.265625, "loss": 3.8160934448242188 }, { "id": "identity_320218", "image": "PMC-VQA/images/PMC7160729_F2_152645.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of images are shown here?\nChoices:\n(A) X-ray images \n(B) MRI images \n(C) CT scan images \n(D) Ultrasound images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.875, "loss": 4.527724266052246 }, { "id": "identity_145972", "image": "DocVQA/images/yxbx0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the President and Chief Executive Officer of The Great Western Sugar Company?" }, { "from": "gpt", "value": "The answer is Jack B. Powell" } ], "clip_score": 19.078125, "loss": 1.6391031742095947 }, { "id": "identity_16238", "image": "PlotQA/images/110127.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between the amount spent on travel items of national tourists in Guinea and that in Hungary ?" }, { "from": "gpt", "value": "The answer is 6080000000.0" } ], "clip_score": 26.375, "loss": 1.9840898513793945 }, { "id": "identity_263927", "image": "IconQA/images/12738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is it possible to draw a square using this shape?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.045055389404297 }, { "id": "identity_168188", "image": "IconQA/images/94874.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If two marbles are removed, how many blue stripes will be left on the remaining marbles?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.359375, "loss": 2.264498472213745 }, { "id": "identity_23968", "image": "PMC-VQA/images/PMC7523872_F1_262817.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where are the lymph nodes located? \nChoices:\n(A) Brain \n(B) Legs \n(C) Neck \n(D) Groin " }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 18.59375, "loss": 3.8626747131347656 }, { "id": "identity_47254", "image": "FigureQA/images/39501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Dark Orange occupy more than 50% of the pie chart?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 39.4375, "loss": 3.1516292095184326 }, { "id": "identity_218968", "image": "TextVQA/images/04ec26c906c3a2a5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the alphanumeric code printed on the label below the color chart?" }, { "from": "gpt", "value": "The answer is X001828120" } ], "clip_score": 19.8125, "loss": 2.2420196533203125 }, { "id": "identity_118619", "image": "VQA2.0/images/353671.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of plate is the dessert served on?" }, { "from": "gpt", "value": "The answer is A white plate with a green pattern" } ], "clip_score": 25.96875, "loss": 2.9864792823791504 }, { "id": "identity_165438", "image": "UniGeo/images/3455.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/2 * 9 = 4.5" } ], "clip_score": 28.796875, "loss": 1.9157886505126953 }, { "id": "identity_171969", "image": "CLEVR-Math/images/CLEVR_train_065561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many blue objects are there?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.078125, "loss": 2.3508434295654297 }, { "id": "identity_146607", "image": "ChartQA/images/12217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of voters supporting the Democratic candidate in June 2006?" }, { "from": "gpt", "value": "The answer is 68%" } ], "clip_score": 33.375, "loss": 2.1693167686462402 }, { "id": "identity_186457", "image": "PMC-VQA/images/PMC9417064_FIG6_386919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the sinuses be identified in this image?" }, { "from": "gpt", "value": "The answer is Yes, sinuses can be identified" } ], "clip_score": 27.984375, "loss": 2.531992197036743 }, { "id": "identity_27506", "image": "TabMWP/images/34125.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The local high school found out how many members each club had. What is the mode of the numbers?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.6875, "loss": 2.4016008377075195 }, { "id": "identity_229624", "image": "PMC-VQA/images/PMC5880262_MCS002444MILF1_296221.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many recurrences occurred in the same region as the initial mass, and in which region did the final recurrence occur?" }, { "from": "gpt", "value": "The answer is Three recurrences, left parietal region" } ], "clip_score": 24.796875, "loss": 3.526106119155884 }, { "id": "identity_43679", "image": "DVQA/images/bar_train_00112948.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of \"bird\" in \"note\" greater than the value of \"bird\" in \"merit\"?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 21.96875, "loss": 3.3579750061035156 }, { "id": "identity_13614", "image": "DVQA/images/bar_train_00127369.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.15625, "loss": 3.4628233909606934 }, { "id": "identity_54641", "image": "DVQA/images/bar_train_00161229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group has the highest value for the 'reads' category?" }, { "from": "gpt", "value": "The answer is The 'milk' group has the highest value for 'reads'" } ], "clip_score": 18.0, "loss": 1.7632769346237183 }, { "id": "identity_234898", "image": "GeoQA+/images/2314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在Rt△ABC中,∠BCA=90°,△PAB中AB边上的高等于AB的长度,△QBC中BC边上的高等于BC的长度,△HAC中AC边上的高等于AC的长度,且△PAB,△QBC的面积分别是10和8,求△ABC的面积。" }, { "from": "gpt", "value": "The answer is \\( 4\\sqrt{5} \\)" } ], "clip_score": 32.25, "loss": 2.608051061630249 }, { "id": "identity_154081", "image": "TQA/images/question_images/human_system_digestive_1393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the organ that produces bile to aid in digestion?" }, { "from": "gpt", "value": "The answer is Gallbladder" } ], "clip_score": 29.984375, "loss": 1.7831664085388184 }, { "id": "identity_24640", "image": "Geometry3K/images/1295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If W P is a perpendicular bisector, m \\angle W H A = 8 q + 17, m \\angle H W P = 10 + q, A P = 6 r + 4, and P H = 22 + 3 r, find r.\nChoices:\n(A) 6\n(B) 7\n(C) 8\n(D) 9" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.1875, "loss": 2.386991500854492 }, { "id": "identity_120411", "image": "A-OKVQA/images/386581.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the horse's mane?" }, { "from": "gpt", "value": "The answer is Blonde" } ], "clip_score": 22.078125, "loss": 3.3650410175323486 }, { "id": "identity_322891", "image": "DocVQA/images/phfg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the professor of Medical and Hospital Administration?" }, { "from": "gpt", "value": "The answer is Cecil G. Sheps, M.D., M.P.H." } ], "clip_score": 20.34375, "loss": 1.356921911239624 }, { "id": "identity_51150", "image": "DVQA/images/bar_train_00001183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the least variation in accuracy?" }, { "from": "gpt", "value": "The answer is reply" } ], "clip_score": 31.40625, "loss": 3.730443239212036 }, { "id": "identity_34570", "image": "ScienceQA/images/10117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of pink particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution A\n(C) Solution B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.125, "loss": 3.2119526863098145 }, { "id": "identity_239839", "image": "PMC-VQA/images/PMC4369826_Fig2_370506.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the characteristics of the predominantly solid tumor seen in the ultrasonography, including its echogenicity, shape, and any associated features?" }, { "from": "gpt", "value": "The answer is Hypoechoic, irregular shape, may have calcifications or vascularity" } ], "clip_score": 34.84375, "loss": 2.677370309829712 }, { "id": "identity_173328", "image": "TabMWP/images/23353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of times macaroni and cheese was eaten last month?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 34.0, "loss": 2.576242446899414 }, { "id": "identity_101169", "image": "IconQA/images/47152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of smaller shapes to larger shapes?" }, { "from": "gpt", "value": "The answer is 1:1" } ], "clip_score": 27.34375, "loss": 1.677964687347412 }, { "id": "identity_336854", "image": "PMC-VQA/images/PMC4921494_F2_45002.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the thickness of these cross-sections?\nChoices:\n(A) 1 μm \n(B) 5 μm \n(C) 10 μm \n(D) 15 μm" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.359375, "loss": 4.04038143157959 }, { "id": "identity_73810", "image": "ScienceQA/images/7248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many muffins are visible in the image?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 23.71875, "loss": 2.349026918411255 }, { "id": "identity_160633", "image": "UniGeo/images/481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle 3?" }, { "from": "gpt", "value": "The answer is 42 degrees" } ], "clip_score": 31.828125, "loss": 2.2912240028381348 }, { "id": "identity_206313", "image": "A-OKVQA/images/402281.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What season is depicted in the image?" }, { "from": "gpt", "value": "The answer is Either spring or autumn" } ], "clip_score": 16.515625, "loss": 3.658006429672241 }, { "id": "identity_276954", "image": "FigureQA/images/67815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Red the highest in value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.296875, "loss": 4.599837303161621 }, { "id": "identity_107735", "image": "PMC-VQA/images/PMC3795743_pone-0076645-g003_237342.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object in the bottom left corner?" }, { "from": "gpt", "value": "The answer is Irregular and roundish" } ], "clip_score": 18.09375, "loss": 4.278596878051758 }, { "id": "identity_225327", "image": "A-OKVQA/images/196510.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you were riding one of these bicycles and needed to stop quickly, what sequence of actions would you take to ensure a safe and controlled stop?" }, { "from": "gpt", "value": "The answer is Gradually squeeze brake handles, shift weight back, apply consistent pressure" } ], "clip_score": 23.125, "loss": 3.2118306159973145 }, { "id": "identity_298602", "image": "GeoQA+/images/5086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C are on circle O, with angle BOC measuring 70°. What is the degree measure of angle A?\nChoices:\n(A) 35°\n(B) 45°\n(C) 40°\n(D) 70°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.28125, "loss": 2.7239890098571777 }, { "id": "identity_305383", "image": "VQA2.0/images/424470.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this appear to be a photo taken from below?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 17.390625, "loss": 3.329418897628784 }, { "id": "identity_244627", "image": "PlotQA/images/117184.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total population percentage for all countries in 1960?" }, { "from": "gpt", "value": "The answer is 145%" } ], "clip_score": 29.90625, "loss": 2.2398500442504883 }, { "id": "identity_51821", "image": "FigureQA/images/43146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color represents the category with the smaller section?" }, { "from": "gpt", "value": "The answer is Bubblegum" } ], "clip_score": 34.1875, "loss": 2.3169054985046387 }, { "id": "identity_134598", "image": "DVQA/images/bar_train_00164102.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who preferred the temple across both categories?" }, { "from": "gpt", "value": "The answer is 16" } ], "clip_score": 27.625, "loss": 2.430013418197632 }, { "id": "identity_179738", "image": "TabMWP/images/21568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many students belong to more than 4 clubs?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 22.34375, "loss": 2.341449022293091 }, { "id": "identity_139781", "image": "DocVQA/images/qrfk0226_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the volume number of the journal where the first publication was published?" }, { "from": "gpt", "value": "The answer is 237" } ], "clip_score": 17.015625, "loss": 2.0488269329071045 }, { "id": "identity_74391", "image": "AI2D/images/2344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What structure is labeled at the beginning of the life cycle?" }, { "from": "gpt", "value": "The answer is Mature sporophyte" } ], "clip_score": 27.140625, "loss": 2.2905924320220947 }, { "id": "identity_8068", "image": "DocVQA/images/qpvw0217_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the article?" }, { "from": "gpt", "value": "The answer is Is There a Connection Between Estrogen and Parkinson's Disease?" } ], "clip_score": 19.890625, "loss": 2.2381575107574463 }, { "id": "identity_289630", "image": "VQA2.0/images/7746.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of different types of cake that are on the table?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.6875, "loss": 2.10886549949646 }, { "id": "identity_25409", "image": "DocVQA/images/jlbx0227_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the value of other trade payables and accruals in the current liabilities ?" }, { "from": "gpt", "value": "The answer is 1,755" } ], "clip_score": 22.09375, "loss": 2.6621627807617188 }, { "id": "identity_176810", "image": "CLEVR-Math/images/CLEVR_train_010317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of cubes to cylinders?" }, { "from": "gpt", "value": "The answer is 2:1" } ], "clip_score": 24.828125, "loss": 1.300234079360962 }, { "id": "identity_30814", "image": "CLEVR-Math/images/CLEVR_train_042303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all spheres. How many objects are left?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 24.59375, "loss": 2.1033875942230225 }, { "id": "identity_193968", "image": "PMC-VQA/images/PMC5763060_fig3_262113.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see any fractures in the bones visible in this image?" }, { "from": "gpt", "value": "The answer is No, there are no visible fractures" } ], "clip_score": 34.125, "loss": 1.9200465679168701 }, { "id": "identity_175733", "image": "IconQA/images/48258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shape is cut into two equal parts, what would be the area of each part?" }, { "from": "gpt", "value": "The answer is 2 unit squares" } ], "clip_score": 27.125, "loss": 3.250991106033325 }, { "id": "identity_214206", "image": "PMC-VQA/images/PMC9088844_F1_276123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which side of the patient's body is marked with a letter in this image?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 30.09375, "loss": 4.536774158477783 }, { "id": "identity_12150", "image": "Geometry3K/images/47.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the perimeter of trapezoid P Q R S.\nChoices:\n(A) 38\n(B) 38 + 6 \\sqrt { 2 }\n(C) 38 + 6 \\sqrt { 3 }\n(D) 38 + 6 \\sqrt { 2 } + 6 \\sqrt { 3 }" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.6875, "loss": 2.49099063873291 }, { "id": "identity_214948", "image": "PMC-VQA/images/PMC6737727_F3_36291.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the slab-like signal located in the gray matter or white matter of the brain?" }, { "from": "gpt", "value": "The answer is White matter" } ], "clip_score": 36.71875, "loss": 3.3778464794158936 }, { "id": "identity_17023", "image": "Super-CLEVR/images/superCLEVR_new_001036.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more motorbikes that are in front of the cyan rubber cruiser than tiny yellow trucks?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 24.5625, "loss": 5.048224925994873 }, { "id": "identity_130687", "image": "VizWiz/images/VizWiz_train_00001865.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the 'Accept' button in the image?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 23.46875, "loss": 2.6504387855529785 }, { "id": "identity_120891", "image": "TextVQA/images/5e3434e835b3fb35.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is taking place in the bottom left image?" }, { "from": "gpt", "value": "The answer is Picketing or protesting" } ], "clip_score": 18.875, "loss": 2.472484588623047 }, { "id": "identity_160947", "image": "GeoQA+/images/639.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在Rt△ABC中,CD是斜边AB上的高,若∠BCD=35°,求CD的长度为()假设AC=5, BC=3" }, { "from": "gpt", "value": "The answer is tan(35°)*3" } ], "clip_score": 30.109375, "loss": 2.987224817276001 }, { "id": "identity_35234", "image": "GeoQA+/images/1552.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,A,B,C是⊙O上的三个点,∠AOB=60°,∠B=55°,则∠A的度数是()\nChoices:\n(A) 25°\n(B) 30°\n(C) 40°\n(D) 50°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.375, "loss": 2.5509629249572754 }, { "id": "identity_194698", "image": "PMC-VQA/images/PMC5659859_FI170328cr-2_232191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there evidence of a pneumothorax on the left side of the chest?" }, { "from": "gpt", "value": "The answer is No, there is no visible pneumothorax" } ], "clip_score": 27.921875, "loss": 1.850623369216919 }, { "id": "identity_217046", "image": "VQA-AS/images/8598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paintings are visible on the wall?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 23.03125, "loss": 4.460611343383789 }, { "id": "identity_282344", "image": "GeoQA+/images/560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that triangle ABC is congruent to triangle DEF, angle A is 80°, and angle B is 60°, what is the measure of angle F? \nChoices:\n(A) 30°\n(B) 40°\n(C) 70°\n(D) 80°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.0, "loss": 2.5723214149475098 }, { "id": "identity_137302", "image": "ChartQA/images/two_col_41634.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the combined percentage of NKT Group and Nexans?" }, { "from": "gpt", "value": "The answer is 66%" } ], "clip_score": 28.015625, "loss": 2.5470175743103027 }, { "id": "identity_249660", "image": "DocVQA/images/fxhm0052_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the Digital Dilution Instrument (DDI) in the context of cigarette testing, and how does it contribute to the measurement process?" }, { "from": "gpt", "value": "The answer is Measures average filter tip ventilation of 15 cigarettes in percent" } ], "clip_score": 19.8125, "loss": 3.707798957824707 }, { "id": "identity_50128", "image": "ChartQA/images/7017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of 'Should' opinion across all countries listed?" }, { "from": "gpt", "value": "The answer is 52.25%" } ], "clip_score": 23.9375, "loss": 1.6673697233200073 }, { "id": "identity_84884", "image": "GeoQA+/images/4329.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在△ABC中,∠C=36°,点D是△ABC的内部一点,且∠CBD=∠CDB,则∠CBD的度数是多少?" }, { "from": "gpt", "value": "The answer is 72°" } ], "clip_score": 26.25, "loss": 1.9494236707687378 }, { "id": "identity_194232", "image": "PMC-VQA/images/PMC5217944_pone.0169462.g002_111194.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bright area in the center of the image likely indicating?" }, { "from": "gpt", "value": "The answer is Likely a tumor or pathology" } ], "clip_score": 30.671875, "loss": 3.277054786682129 }, { "id": "identity_253292", "image": "A-OKVQA/images/533799.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What specific features of the yellow truck indicate that it is an emergency vehicle, and how do these features justify the other cars letting it go?" }, { "from": "gpt", "value": "The answer is Flashing lights, sirens, emergency service markings" } ], "clip_score": 27.875, "loss": 3.094029188156128 }, { "id": "identity_217283", "image": "PMC-VQA/images/PMC5243825_F4_115824.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the highlighted area located in the cerebral cortex?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.90625, "loss": 3.410121440887451 }, { "id": "identity_275629", "image": "TabMWP/images/25261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the most frequently occurring number of states visited by students in Elizabeth's class?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 32.875, "loss": 2.1634063720703125 }, { "id": "identity_179454", "image": "IconQA/images/100327.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the paper clips if each paper clip is 2 centimeters long?" }, { "from": "gpt", "value": "The answer is 8 centimeters" } ], "clip_score": 29.125, "loss": 1.632886290550232 }, { "id": "identity_198551", "image": "PMC-VQA/images/PMC6826500_brainsci-09-00262-f020_60459.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What structure is labeled at the bottom center of the image?" }, { "from": "gpt", "value": "The answer is NVU (Neurovascular Unit)" } ], "clip_score": 31.328125, "loss": 1.8371360301971436 }, { "id": "identity_144475", "image": "DocVQA/images/gqxw0023_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pages are included in this transmission report?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.578125, "loss": 2.186396360397339 }, { "id": "identity_191810", "image": "PMC-VQA/images/PMC6820998_Fig1_58533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many distinct green fluorescent spots can be counted in the image?" }, { "from": "gpt", "value": "The answer is an integer number" } ], "clip_score": 28.015625, "loss": 3.698354482650757 }, { "id": "identity_179665", "image": "TabMWP/images/34084.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between the most expensive bead and the least expensive bead?" }, { "from": "gpt", "value": "The answer is $0.45" } ], "clip_score": 34.28125, "loss": 1.9336000680923462 }, { "id": "identity_163634", "image": "GeoQA+/images/5649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that S1 represents the area of the square on side AC of right triangle ABC, and S2 represents the area of the square on side AB, if S1 = 3 and S2 = 11, what is the length of side AC of triangle ABC?" }, { "from": "gpt", "value": "The answer is √3" } ], "clip_score": 27.125, "loss": 2.5315322875976562 }, { "id": "identity_86957", "image": "GeoQA+/images/3509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the rhombus ABCD, with ∠A = 60°, after folding point C onto line DP, what is the ratio of the area of triangle C'DE to the area of the original rhombus ABCD?" }, { "from": "gpt", "value": "The answer is 1:2" } ], "clip_score": 35.34375, "loss": 1.2576663494110107 }, { "id": "identity_47080", "image": "FigureQA/images/35047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are used to represent different segments in the pie chart?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 36.09375, "loss": 2.3644278049468994 }, { "id": "identity_79758", "image": "UniGeo/images/3464.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ADB in the given circle diagram?" }, { "from": "gpt", "value": "The answer is 160 degrees" } ], "clip_score": 34.75, "loss": 2.1867873668670654 }, { "id": "identity_218734", "image": "VQA2.0/images/28103.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tennis ball?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 22.78125, "loss": 2.628338575363159 }, { "id": "identity_310612", "image": "TextVQA/images/24b5125dfe480ff8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sign on this?" }, { "from": "gpt", "value": "The answer is menu" } ], "clip_score": 22.21875, "loss": 3.3869471549987793 }, { "id": "identity_80206", "image": "UniGeo/images/3189.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If DE is parallel to BC and AC/AD = 1/2, what is the perimeter of triangle ADE if AC = 4 cm and AD = 6 cm?" }, { "from": "gpt", "value": "The answer is 11 cm" } ], "clip_score": 34.21875, "loss": 2.1577298641204834 }, { "id": "identity_27824", "image": "FigureQA/images/9671.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Navy Blue greater than Lime Green?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.546875, "loss": 3.635333299636841 }, { "id": "identity_274847", "image": "Geometry3K/images/1529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle 1.\nOptions:\n(A) 42\n(B) 72\n(C) 108\n(D) 120" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.953125, "loss": 3.302699327468872 }, { "id": "identity_15866", "image": "DocVQA/images/stkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the responsible investigator?" }, { "from": "gpt", "value": "The answer is robert M. Kark, M.D." } ], "clip_score": 21.328125, "loss": 3.1234893798828125 }, { "id": "identity_244217", "image": "ScienceQA/images/16809.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the highlighted country and list its neighboring countries shown on the map" }, { "from": "gpt", "value": "The answer is Cuba, Haiti" } ], "clip_score": 34.46875, "loss": 3.241879940032959 }, { "id": "identity_126779", "image": "VQA2.0/images/540471.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of headgear is the skateboarder wearing?" }, { "from": "gpt", "value": "The answer is Full-face helmet" } ], "clip_score": 24.84375, "loss": 2.9359781742095947 }, { "id": "identity_67781", "image": "ScienceQA/images/8402.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many states border the highlighted state?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 28.5, "loss": 2.230405569076538 }, { "id": "identity_260326", "image": "PMC-VQA/images/PMC5599687_Fig6_215749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the regions in the forebrain that show immunoreactivity and describe their relative positions to each other" }, { "from": "gpt", "value": "The answer is Arachnoid barrier cell layer near dorsal surface and superior sagittal sinus" } ], "clip_score": 30.03125, "loss": 3.2352094650268555 }, { "id": "identity_96390", "image": "TabMWP/images/32168.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of miles biked by all members last week?" }, { "from": "gpt", "value": "The answer is 581" } ], "clip_score": 23.625, "loss": 2.466251850128174 }, { "id": "identity_315354", "image": "PMC-VQA/images/PMC7189031_os12665-fig-0004_160452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What surgical procedure is depicted in this radiographic image?\nChoices:\n(A) Hip dislocation surgery \n(B) Arthroscopy \n(C) Transverse osteotomy combined with total hip arthroplasty (THA) \n(D) Craniotomy." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.359375, "loss": 3.778671979904175 }, { "id": "identity_283961", "image": "MapQA/images/map_2384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the first symbol in the legend indicative of the smallest category?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 11.046875, "loss": 3.3504414558410645 }, { "id": "identity_82237", "image": "Geometry3K/images/1955.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the two polygons are similar, what is the value of x?" }, { "from": "gpt", "value": "The answer is x = 24" } ], "clip_score": 28.984375, "loss": 2.128185749053955 }, { "id": "identity_15065", "image": "UniGeo/images/1954.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,C、D是线段AB上的两点,且D是线段AC的中点.若AB=10cm,BC=4cm,则AD的长为()\nChoices:\n(A) 2cm\n(B) 3cm\n(C) 4cm\n(D) 6cm" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.171875, "loss": 2.499269962310791 }, { "id": "identity_196464", "image": "PMC-VQA/images/PMC6747042_f1_38423.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the scale bar at the bottom right of the image indicating?" }, { "from": "gpt", "value": "The answer is Reference for size measurement" } ], "clip_score": 21.5625, "loss": 4.509439945220947 }, { "id": "identity_203741", "image": "A-OKVQA/images/357943.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the grass?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 21.515625, "loss": 2.324472427368164 }, { "id": "identity_313114", "image": "PlotQA/images/70901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value for insurance and financial services across these countries?" }, { "from": "gpt", "value": "The answer is 15.6133054258583" } ], "clip_score": 26.640625, "loss": 2.6760101318359375 }, { "id": "identity_238391", "image": "UniGeo/images/1691.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB为⊙O的直径,C为⊙O外一点,过点C作⊙O的切线,切点为B,连接AC交⊙O于点D,∠C=40°,点E在AB左侧的半圆上运动(不与A、B重合),若点E在⊙O上运动时,求∠AED的最大值。" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 26.234375, "loss": 1.7437924146652222 }, { "id": "identity_152618", "image": "TQA/images/question_images/ozone_formation_9202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many separate steps are illustrated in the natural ozone production process?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 35.125, "loss": 2.285600423812866 }, { "id": "identity_225281", "image": "DVQA/images/bar_train_00089232.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for items that sold more than 5 units in all stores combined?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 28.578125, "loss": 2.4173262119293213 }, { "id": "identity_249671", "image": "TextVQA/images/8fe9aaf4f3eb6afc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many mimobot USB flash drives are visible in the image, and what themes do they represent?" }, { "from": "gpt", "value": "The answer is Six mimobot USB flash drives; themes are HALO and Star Wars" } ], "clip_score": 28.359375, "loss": 2.7687795162200928 }, { "id": "identity_258411", "image": "PMC-VQA/images/PMC7686009_Fig9_304362.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which specific part of the pelvis exhibits retroversion in this x-ray, and what are the visible signs that indicate this condition?" }, { "from": "gpt", "value": "The answer is Left Acetabulum exhibits retroversion, indicated by posterior orientation" } ], "clip_score": 28.40625, "loss": 3.2001051902770996 }, { "id": "identity_138084", "image": "FigureQA/images/70492.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value of Web Green on the y-axis label?" }, { "from": "gpt", "value": "The answer is 95.7" } ], "clip_score": 29.46875, "loss": 2.6981799602508545 }, { "id": "identity_161424", "image": "GeoQA+/images/2494.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,A,C,D在圆上,∠D=65°,求∠ACB的度数为多少?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 32.125, "loss": 1.831803321838379 }, { "id": "identity_298388", "image": "AI2D/images/1858.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Create a food chain with three trophic levels from the provided food web.\nOptions:\n(A) phytoplankton-krill-penguin\n(B) krill-phytoplankton-fish\n(C) fish-phytoplankton-krill\n(D) all options are correct" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 41.1875, "loss": 2.8209891319274902 }, { "id": "identity_308436", "image": "FigureQA/images/42130.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Olive Drab the lower median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.6015625, "loss": 4.1909074783325195 }, { "id": "identity_233532", "image": "IconQA/images/101062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many degrees has the letter been rotated, and in which direction (clockwise or counterclockwise)?" }, { "from": "gpt", "value": "The answer is 180 degrees clockwise" } ], "clip_score": 23.921875, "loss": 1.882988452911377 }, { "id": "identity_5843", "image": "GeoQA+/images/1013.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,将一对三角板如图放置,∠BAC=∠ADE=90°,∠E=45°,∠C=30°,若AE∥BC,则∠AFD=()\nChoices:\n(A) 75°\n(B) 85°\n(C) 90°\n(D) 65°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.96875, "loss": 2.6746418476104736 }, { "id": "identity_245765", "image": "PMC-VQA/images/PMC4520183_Fig2_410686.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the regions where the FLAIR image shows hyperintense lesions, and specify the regions with the highest and lowest intensity of these lesions" }, { "from": "gpt", "value": "The answer is Right temporal lobe has highest, deep white matter has lowest" } ], "clip_score": 33.03125, "loss": 3.0405161380767822 }, { "id": "identity_116295", "image": "A-OKVQA/images/40211.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a safety warning label visible on the side mirror?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.15625, "loss": 3.320575714111328 }, { "id": "identity_206453", "image": "A-OKVQA/images/430688.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are suspended on the wire?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 14.8046875, "loss": 4.531371116638184 }, { "id": "identity_338217", "image": "ScienceQA/images/2753.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Providence\n(B) Newport\n(C) Richmond\n(D) Boston" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.421875, "loss": 4.5218892097473145 }, { "id": "identity_122884", "image": "A-OKVQA/images/557045.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the stadium shown in the image?" }, { "from": "gpt", "value": "The answer is Fenway Park" } ], "clip_score": 26.0625, "loss": 1.8895825147628784 }, { "id": "identity_245617", "image": "DocVQA/images/rnww0217_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How long is the meeting scheduled to last, and what is the room number where it will be held?" }, { "from": "gpt", "value": "The answer is 1 hour and 30 minutes, Room 150B2" } ], "clip_score": 19.140625, "loss": 2.311720371246338 }, { "id": "identity_3182", "image": "TabMWP/images/19137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Some scooter companies compared how many scooters they sold. What is the median of the numbers?" }, { "from": "gpt", "value": "The answer is 56" } ], "clip_score": 39.34375, "loss": 2.564206838607788 }, { "id": "identity_75761", "image": "ScienceQA/images/10632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is highlighted in dark green on the map?" }, { "from": "gpt", "value": "The answer is Kentucky" } ], "clip_score": 33.0, "loss": 2.6155452728271484 }, { "id": "identity_7003", "image": "DVQA/images/bar_train_00005918.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are preferred by more than 9 people in at least one category?" }, { "from": "gpt", "value": "The answer is zero" } ], "clip_score": 28.71875, "loss": 3.250636339187622 }, { "id": "identity_161803", "image": "UniGeo/images/729.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of circle O is 5 units, what is the length of the arc AC?" }, { "from": "gpt", "value": "The answer is 5π/3 or approximately 5.24 units" } ], "clip_score": 32.03125, "loss": 1.5672484636306763 }, { "id": "identity_334630", "image": "TabMWP/images/18611.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A pottery factory recorded the number of broken plates per shipment last week. How many shipments had between 35 and 58 broken plates?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 38.0, "loss": 2.224968433380127 }, { "id": "identity_33661", "image": "ChartQA/images/839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average of all the bars ?" }, { "from": "gpt", "value": "The answer is 44.3" } ], "clip_score": 10.8203125, "loss": 2.198035478591919 }, { "id": "identity_82685", "image": "GeoQA+/images/5306.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the tree is to be propped back up to its original position, what angle with respect to the ground should it make?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 27.46875, "loss": 2.1376290321350098 }, { "id": "identity_38335", "image": "PMC-VQA/images/PMC2949861_F12_75279.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which direction is anterior in the images? \nChoices:\n(A) Right \n(B) Left \n(C) Up \n(D) Down " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.71875, "loss": 3.972965955734253 }, { "id": "identity_235968", "image": "A-OKVQA/images/421823.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many geese are flying in a formation if each group of geese in the formation contains 3 geese and there are 2 such groups?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 21.796875, "loss": 2.6072237491607666 }, { "id": "identity_297220", "image": "ChartQA/images/10799.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the numerical gap between the highest conservative value and the lowest liberal value?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 15.578125, "loss": 2.5433332920074463 }, { "id": "identity_65888", "image": "AI2D/images/4331.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of berry-type fruits shown in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.8125, "loss": 2.295565605163574 }, { "id": "identity_333942", "image": "PMC-VQA/images/PMC8101207_Fig2_434625.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the radiograph show about the left humerus?\nChoices:\n(A) osteoblastic destruction in the upper part of the humerus.\n(B) osteoblastic destruction in the lower part of the humerus.\n(C) osteolytic destruction in the middle part of the humerus.\n(D) osteolytic destruction in the distal part of the humerus." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.28125, "loss": 3.7901382446289062 }, { "id": "identity_65514", "image": "ScienceQA/images/15409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the top magnet is flipped upside down, what poles will be facing each other?" }, { "from": "gpt", "value": "The answer is North and North" } ], "clip_score": 24.328125, "loss": 2.780245542526245 }, { "id": "identity_156720", "image": "AI2D/images/2637.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color used to represent the Earth in this diagram?" }, { "from": "gpt", "value": "The answer is Blue and green" } ], "clip_score": 24.28125, "loss": 3.604764699935913 }, { "id": "identity_248131", "image": "DocVQA/images/gryc0227_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many members are there in the curriculum committee, including the chairman, and what percentage of the committee does the chairman represent?" }, { "from": "gpt", "value": "The answer is 12 members, 8.33%" } ], "clip_score": 21.65625, "loss": 2.159287452697754 }, { "id": "identity_124211", "image": "TextVQA/images/eb0d2e351d5c2594.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the flag being waved by the person in the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.234375, "loss": 3.1585099697113037 }, { "id": "identity_199834", "image": "PMC-VQA/images/PMC3758542_RSOB130081F2_228264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate number of cells visible in the image?" }, { "from": "gpt", "value": "The answer is 12" } ], "clip_score": 23.90625, "loss": 2.3042714595794678 }, { "id": "identity_188882", "image": "A-OKVQA/images/360665.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the buildings in the background?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 17.765625, "loss": 3.4931740760803223 }, { "id": "identity_313112", "image": "VQA-AS/images/15200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could they fall?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 20.765625, "loss": 3.246358871459961 }, { "id": "identity_185089", "image": "A-OKVQA/images/62387.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles turning left at the intersection?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 22.921875, "loss": 3.054641008377075 }, { "id": "identity_54321", "image": "ChartQA/images/10249312004999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the source of the data presented in the graph?" }, { "from": "gpt", "value": "The answer is Cunningham, H. and Viazza, P.P. (1996)" } ], "clip_score": 16.125, "loss": 2.4973156452178955 }, { "id": "identity_145794", "image": "DVQA/images/bar_train_00187345.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store had the least sales for the item 'dining'?" }, { "from": "gpt", "value": "The answer is bunk" } ], "clip_score": 31.875, "loss": 2.7285194396972656 }, { "id": "identity_261886", "image": "PlotQA/images/101401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the gap or interval between two consecutive major ticks on the Y-axis?" }, { "from": "gpt", "value": "The answer is 2.0" } ], "clip_score": 18.125, "loss": 2.4501447677612305 }, { "id": "identity_321097", "image": "A-OKVQA/images/69189.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the person on the bike do for a living?\nChoices:\n(A) gives rides\n(B) sells bikes\n(C) sells chickens\n(D) steals bikes" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.046875, "loss": 4.225498676300049 }, { "id": "identity_182046", "image": "PMC-VQA/images/PMC6822922_FIG1_59149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the measurement on the right side of the image in millimeters or centimeters?" }, { "from": "gpt", "value": "The answer is Millimeters" } ], "clip_score": 22.40625, "loss": 2.4298477172851562 }, { "id": "identity_32272", "image": "PMC-VQA/images/PMC4478850_F6_399424.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the tissue indicated by the solid arrow in the image? \nChoices:\n(A) Stele \n(B) Cortex tissue \n(C) Epidermis \n(D) Phloem tissue " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.140625, "loss": 4.869128704071045 }, { "id": "identity_149123", "image": "DocVQA/images/pqnh0078_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the hotel where the meeting is taking place?" }, { "from": "gpt", "value": "The answer is Willard Inter-Continental Hotel" } ], "clip_score": 21.359375, "loss": 1.9444458484649658 }, { "id": "identity_206415", "image": "PMC-VQA/images/PMC6365311_Fig8_432965.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the large curved structure in the center of the image?" }, { "from": "gpt", "value": "The answer is Corpus callosum" } ], "clip_score": 29.5625, "loss": 2.2603931427001953 }, { "id": "identity_49720", "image": "FigureQA/images/41732.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage value of the Red section?" }, { "from": "gpt", "value": "The answer is 75%" } ], "clip_score": 30.609375, "loss": 2.001223087310791 }, { "id": "identity_103181", "image": "CLEVR-Math/images/CLEVR_train_020091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of edges on all the cubes combined?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 20.4375, "loss": 1.8022783994674683 }, { "id": "identity_326234", "image": "VQA2.0/images/324428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of engine is this?" }, { "from": "gpt", "value": "The answer is outboard motor" } ], "clip_score": 21.859375, "loss": 3.2813785076141357 }, { "id": "identity_138476", "image": "FigureQA/images/67579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times does Light Coral appear above 40 on the y-axis label?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 27.5625, "loss": 1.8792985677719116 }, { "id": "identity_298866", "image": "PMC-VQA/images/PMC6957570_Fig8_99259.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which samples are depicted in figure g? Is it:\n(A) Contralateral to injection site\n(B) Ipsilateral to injection site\n(C) Box f\n(D) Box d" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.578125, "loss": 4.100458145141602 }, { "id": "identity_248112", "image": "VQA2.0/images/465975.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the potential hazards for skiing in this area, and how can they be mitigated?" }, { "from": "gpt", "value": "The answer is Identify hazards and implement safety measures for skiing" } ], "clip_score": 17.21875, "loss": 3.3032724857330322 }, { "id": "identity_42122", "image": "DVQA/images/bar_train_00011913.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the object \"chance\" have more female or lesson preference?" }, { "from": "gpt", "value": "The answer is Female preference" } ], "clip_score": 29.28125, "loss": 2.5418262481689453 }, { "id": "identity_186126", "image": "A-OKVQA/images/154329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pitcher doing in the image?" }, { "from": "gpt", "value": "The answer is Pitching" } ], "clip_score": 26.34375, "loss": 2.369711399078369 }, { "id": "identity_94200", "image": "IconQA/images/76559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of squares, including both the larger squares and the smaller purple squares inside them?" }, { "from": "gpt", "value": "The answer is 128" } ], "clip_score": 33.75, "loss": 1.9886913299560547 }, { "id": "identity_53980", "image": "PlotQA/images/119986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the Middle income category have a higher expenditure on healthcare or R&D?" }, { "from": "gpt", "value": "The answer is Healthcare" } ], "clip_score": 28.65625, "loss": 2.7764923572540283 }, { "id": "identity_326109", "image": "VizWiz/images/VizWiz_train_00013333.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of canned item is shown here?" }, { "from": "gpt", "value": "The answer is pumpkin" } ], "clip_score": 32.65625, "loss": 2.4335391521453857 }, { "id": "identity_230013", "image": "PMC-VQA/images/PMC5423910_F1_161468.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the vertebral levels and describe the anatomical structures affected by the lesion" }, { "from": "gpt", "value": "The answer is Spinal cord, nerve roots, intervertebral discs, vertebral bodies, ligaments" } ], "clip_score": 31.359375, "loss": 1.9661942720413208 }, { "id": "identity_52939", "image": "FigureQA/images/64340.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value reached by the Aqua curve?" }, { "from": "gpt", "value": "The answer is 68" } ], "clip_score": 24.65625, "loss": 2.814239025115967 }, { "id": "identity_286413", "image": "A-OKVQA/images/297910.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which animal are the people observing? \n(A) goats \n(B) lambs \n(C) donkeys \n(D) horses" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 23.765625, "loss": 3.7242531776428223 }, { "id": "identity_131536", "image": "VQA2.0/images/353276.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the standing seagull's beak?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 28.421875, "loss": 3.6801528930664062 }, { "id": "identity_310107", "image": "TQA/images/question_images/human_system_circulatory_3642.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structure sends blood to the lower body?\nChoices:\n(A) Right Atrium\n(B) Descending Aorta\n(C) Superior Vena Cava\n(D) Ascending Aorta" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.640625, "loss": 4.06245756149292 }, { "id": "identity_76199", "image": "ScienceQA/images/8866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organism is at the top of the food chain in this ecosystem?" }, { "from": "gpt", "value": "The answer is Grizzly bear" } ], "clip_score": 34.21875, "loss": 1.8230618238449097 }, { "id": "identity_249386", "image": "Geometry3K/images/1109.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of the remaining angle in the circle, given that the sum of the angles around a point is 360 degrees. Then, determine the value of \\( x \\) and verify if the sum of all angles equals 360 degrees" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 33.09375, "loss": 3.6189754009246826 }, { "id": "identity_219429", "image": "A-OKVQA/images/77953.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the boat closest to the viewer?" }, { "from": "gpt", "value": "The answer is Salvatore" } ], "clip_score": 18.46875, "loss": 3.636004686355591 }, { "id": "identity_264284", "image": "Geometry3K/images/2019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In circle R, if the measure of angle SU is 20 degrees, the measure of angle YW is 20 degrees, and the measure of angle YX is 45 degrees, what is the measure of angle ST? \n(A) 20\n(B) 30\n(C) 45\n(D) 90" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.9375, "loss": 2.223581075668335 }, { "id": "identity_318203", "image": "GeoQA+/images/5526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,点O是△ABC外接圆的圆心,连接OB,若∠1=37°,则∠2的度数是多少?\nChoices:\n(A) 52°\n(B) 51°\n(C) 53°\n(D) 50°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.9375, "loss": 2.5816543102264404 }, { "id": "identity_267756", "image": "A-OKVQA/images/208408.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following phrases is the most suitable choice?\nOptions:\n(A) airplane race\n(B) conflagration\n(C) downpour\n(D) earthquake" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.5546875, "loss": 5.320538520812988 }, { "id": "identity_124806", "image": "VQA-AS/images/14212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the girl's pants?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 19.328125, "loss": 3.5863258838653564 }, { "id": "identity_134429", "image": "PlotQA/images/22053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the prevalence of severe wasting among children of South Africa under 5 years in 2005?" }, { "from": "gpt", "value": "The answer is 3%" } ], "clip_score": 37.21875, "loss": 2.0908124446868896 }, { "id": "identity_21480", "image": "TextVQA/images/1b3078b0f89e7e9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what was he getting used to in the top right box?" }, { "from": "gpt", "value": "The answer is the smell of freedom" } ], "clip_score": 17.5625, "loss": 3.0589849948883057 }, { "id": "identity_175880", "image": "TabMWP/images/25929.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total increase in record time from 2007 to 2011?" }, { "from": "gpt", "value": "The answer is 5 minutes" } ], "clip_score": 26.1875, "loss": 2.2312088012695312 }, { "id": "identity_14841", "image": "AI2D/images/4162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is needed to turn igneous rock into sediment?\nChoices:\n(A) melting\n(B) weathering and erosion\n(C) compacting\n(D) cooling" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.21875, "loss": 4.021340847015381 }, { "id": "identity_185786", "image": "A-OKVQA/images/442685.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the banners?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 16.640625, "loss": 2.6447832584381104 }, { "id": "identity_326154", "image": "DVQA/images/bar_train_00035349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the bottom in each set?" }, { "from": "gpt", "value": "The answer is beard" } ], "clip_score": 25.3125, "loss": 2.819782018661499 }, { "id": "identity_151531", "image": "AI2D/images/4445.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the item that would be primarily used in a mathematical calculation" }, { "from": "gpt", "value": "The answer is Calculator" } ], "clip_score": 19.1875, "loss": 2.687568426132202 }, { "id": "identity_11265", "image": "UniGeo/images/2682.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,▱ABCD中,∠ABD=50°,AF⊥BC于F,AF交BD于E,点O是DE的中点,连接OA,若DE=2AB,则∠ADB的大小是()\nChoices:\n(A) 25°\n(B) 30°\n(C) 20°\n(D) 35°" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.671875, "loss": 2.400078296661377 }, { "id": "identity_241561", "image": "DocVQA/images/jybx0223_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total export value (Rs. in crore) for all products in the year 2008-09?" }, { "from": "gpt", "value": "The answer is 244.39" } ], "clip_score": 17.921875, "loss": 2.0510785579681396 }, { "id": "identity_53322", "image": "DocVQA/images/gkpf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What year was this publication released by the American Heart Association?" }, { "from": "gpt", "value": "The answer is 1972" } ], "clip_score": 31.140625, "loss": 1.8932348489761353 }, { "id": "identity_52437", "image": "DocVQA/images/ykdv0228_12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the check number for the 'COINS' transaction?" }, { "from": "gpt", "value": "The answer is 383 37" } ], "clip_score": 22.1875, "loss": 3.206312417984009 }, { "id": "identity_136300", "image": "DocVQA/images/syvl0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the degree mentioned for Dr. E. N. McElwee?" }, { "from": "gpt", "value": "The answer is BA in Electrical Engineering" } ], "clip_score": 21.984375, "loss": 2.9456260204315186 }, { "id": "identity_95657", "image": "IconQA/images/52956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of hours and minutes displayed on the clock?" }, { "from": "gpt", "value": "The answer is 9 hours and 0 minutes" } ], "clip_score": 30.578125, "loss": 1.4032779932022095 }, { "id": "identity_225879", "image": "IconQA/images/44714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the slug grows by 50% of its current length, what will be its new length in inches?" }, { "from": "gpt", "value": "The answer is 4.5 inches" } ], "clip_score": 36.125, "loss": 1.9709222316741943 }, { "id": "identity_152788", "image": "AI2D/images/2737.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the phase that appears as a fully illuminated circle?" }, { "from": "gpt", "value": "The answer is Full Moon" } ], "clip_score": 29.96875, "loss": 2.83125376701355 }, { "id": "identity_249272", "image": "IconQA/images/12942.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many grid units has the letter been slid horizontally and vertically?" }, { "from": "gpt", "value": "The answer is 5 horizontally, 0 vertically" } ], "clip_score": 32.0625, "loss": 2.034228801727295 }, { "id": "identity_97808", "image": "IconQA/images/7194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour and minute hands?" }, { "from": "gpt", "value": "The answer is 60 degrees" } ], "clip_score": 29.125, "loss": 1.8150835037231445 }, { "id": "identity_223719", "image": "VizWiz/images/VizWiz_train_00014929.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand name of the product in the jar?" }, { "from": "gpt", "value": "The answer is Schwartz" } ], "clip_score": 27.890625, "loss": 3.2946810722351074 }, { "id": "identity_237103", "image": "A-OKVQA/images/527887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the visual indicators that suggest the beverage in the cup is iced coffee, and how do these indicators differentiate it from the other options?" }, { "from": "gpt", "value": "The answer is iced coffee" } ], "clip_score": 26.234375, "loss": 3.7516472339630127 }, { "id": "identity_33053", "image": "DocVQA/images/mqfm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the designation of Robert W. Winters?" }, { "from": "gpt", "value": "The answer is Consultant" } ], "clip_score": 21.109375, "loss": 3.0102736949920654 }, { "id": "identity_254141", "image": "VQA2.0/images/271949.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many wheels are on the carriage and how many horses are pulling it?" }, { "from": "gpt", "value": "The answer is four wheels and one horse" } ], "clip_score": 28.171875, "loss": 1.8982000350952148 }, { "id": "identity_262316", "image": "FigureQA/images/69909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which option correctly describes Seafoam as the smoothest? \n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.84375, "loss": 4.75468635559082 }, { "id": "identity_53048", "image": "DVQA/images/bar_train_00108669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the accuracy of the 'muscle' algorithm on the 'vision' dataset?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 30.703125, "loss": 2.9465770721435547 }, { "id": "identity_103263", "image": "CLEVR-Math/images/CLEVR_train_001773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.5, "loss": 2.4421722888946533 }, { "id": "identity_26226", "image": "IconQA/images/8855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the dotted line a line of symmetry?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.453125, "loss": 3.8442916870117188 }, { "id": "identity_241389", "image": "A-OKVQA/images/492605.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After player 18 hits the ball, which player is most likely to field it first, considering the positions of the players on the field?" }, { "from": "gpt", "value": "The answer is pitcher" } ], "clip_score": 27.171875, "loss": 3.6074612140655518 }, { "id": "identity_262151", "image": "TextVQA/images/c651dfdc529ae969.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me the brand name of the phone?" }, { "from": "gpt", "value": "The answer is nokia" } ], "clip_score": 26.0625, "loss": 2.37394642829895 }, { "id": "identity_77642", "image": "TQA/images/question_images/nuclear_energy_7095.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the control rod in the reactor core?" }, { "from": "gpt", "value": "The answer is To control the rate of the nuclear reaction" } ], "clip_score": 29.4375, "loss": 2.2992916107177734 }, { "id": "identity_178704", "image": "IconQA/images/68558.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the line was drawn using a pen, how many centimeters of ink was used?" }, { "from": "gpt", "value": "The answer is 6 centimeters" } ], "clip_score": 30.9375, "loss": 1.6730846166610718 }, { "id": "identity_197414", "image": "A-OKVQA/images/458308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the horse's blanket?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 21.1875, "loss": 4.139297962188721 }, { "id": "identity_119202", "image": "A-OKVQA/images/486449.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of transport for the person on the elephant?" }, { "from": "gpt", "value": "The answer is A howdah (elephant seat)" } ], "clip_score": 28.296875, "loss": 2.7066025733947754 }, { "id": "identity_9679", "image": "TQA/images/question_images/nuclear_energy_8117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which fuel is used in the furnace?\nChoices:\n(A) turbine\n(B) transmission\n(C) coal\n(D) oil" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.84375, "loss": 4.153107643127441 }, { "id": "identity_162821", "image": "UniGeo/images/247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, if angle A is 90 degrees and DE is parallel to BC, what is the measure of angle C?" }, { "from": "gpt", "value": "The answer is 90 degrees" } ], "clip_score": 33.5625, "loss": 1.8475878238677979 }, { "id": "identity_3887", "image": "AI2D/images/4156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Igneous rock changes to metamorphic rock by?\nChoices:\n(A) Cooling and crystallizing\n(B) Weathering\n(C) Heating and compacting\n(D) Melting" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.75, "loss": 4.4006123542785645 }, { "id": "identity_186746", "image": "VQA2.0/images/108892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any bicycles visible in the image?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 17.15625, "loss": 3.0877394676208496 }, { "id": "identity_160411", "image": "GeoQA+/images/3855.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the measure of angle AOD in the given diagram?" }, { "from": "gpt", "value": "The answer is 70 degrees" } ], "clip_score": 31.328125, "loss": 2.059478759765625 }, { "id": "identity_62624", "image": "DocVQA/images/ygjm0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is Ulriksdals Wärdshus mentioned as?" }, { "from": "gpt", "value": "The answer is Wärdshus (Inn or Tavern)" } ], "clip_score": 18.140625, "loss": 1.8287781476974487 }, { "id": "identity_191811", "image": "PMC-VQA/images/PMC6820998_Fig1_58533.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate diameter of the largest green fluorescent spot indicated by the red arrow?" }, { "from": "gpt", "value": "The answer is a float number in micrometers (µm)" } ], "clip_score": 32.90625, "loss": 2.6684768199920654 }, { "id": "identity_773", "image": "CLEVR-Math/images/CLEVR_train_022509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all big objects. Subtract all gray cylinders. How many objects are left?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.140625, "loss": 2.188469648361206 }, { "id": "identity_226904", "image": "TQA/images/question_images/glaciers_6933.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the complete journey of a snowflake from its initial fall to its final melting and evaporation, including all the stages and features it encounters along the way" }, { "from": "gpt", "value": "The answer is Snowfall, accumulation, equilibrium line, medial moraine, snout, ablation" } ], "clip_score": 22.390625, "loss": 2.880922317504883 }, { "id": "identity_637", "image": "TextVQA/images/96986e85369e6cb0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: which u.s. highway is referenced?" }, { "from": "gpt", "value": "The answer is 31-w" } ], "clip_score": 18.40625, "loss": 1.5219663381576538 }, { "id": "identity_126704", "image": "A-OKVQA/images/376921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company logo is displayed on the side of the truck?" }, { "from": "gpt", "value": "The answer is U-Haul" } ], "clip_score": 33.125, "loss": 2.170393466949463 }, { "id": "identity_170456", "image": "TabMWP/images/5106.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the grocery store sell more than 480 bananas?" }, { "from": "gpt", "value": "The answer is Wednesday" } ], "clip_score": 33.1875, "loss": 1.8902064561843872 }, { "id": "identity_92183", "image": "TabMWP/images/8081.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of buttons on 3 coats?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 37.75, "loss": 1.9873785972595215 }, { "id": "identity_85391", "image": "UniGeo/images/3319.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if OD is perpendicular to BC and angle BOD is 60 degrees, what is the length of chord BC if the radius of the circle is 5 units?" }, { "from": "gpt", "value": "The answer is 5 units" } ], "clip_score": 35.78125, "loss": 2.3520171642303467 }, { "id": "identity_304018", "image": "ChartQA/images/multi_col_20531.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value recorded in this bar?" }, { "from": "gpt", "value": "The answer is 52" } ], "clip_score": 18.46875, "loss": 2.857896089553833 }, { "id": "identity_232415", "image": "Geometry3K/images/1949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the value of \\( x \\) and then determine \\( m \\angle CDE \\)" }, { "from": "gpt", "value": "The answer is \\( 47.5^\\circ \\)" } ], "clip_score": 31.484375, "loss": 2.884472131729126 }, { "id": "identity_18096", "image": "VQA-AS/images/17181.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Did someone forget to turn the TV off?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.1875, "loss": 2.893646240234375 }, { "id": "identity_239684", "image": "TQA/images/question_images/human_system_respiratory_6196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pathway of air from the nasal cavity to the lungs, listing all the structures it passes through in order?" }, { "from": "gpt", "value": "The answer is Nasal Cavity, Pharynx, Larynx, Trachea, Bronchi, Lungs" } ], "clip_score": 31.625, "loss": 0.8374170660972595 }, { "id": "identity_3627", "image": "PMC-VQA/images/PMC4632949_fig02_440920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the image showing in the bottom row of the figure? \nChoices:\n(A) Whole heart staining. \n(B) Cell staining. \n(C) Injured tissue. \n(D) Untreated mice. " }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.65625, "loss": 4.215926170349121 }, { "id": "identity_112258", "image": "VQA-RAD/images/synpic23571.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the spine show any signs of degenerative changes?" }, { "from": "gpt", "value": "The answer is No signs of degenerative changes" } ], "clip_score": 22.578125, "loss": 2.3567633628845215 }, { "id": "identity_280141", "image": "DVQA/images/bar_train_00187612.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars from the bottom is labeled as the second one?" }, { "from": "gpt", "value": "The answer is desire" } ], "clip_score": 23.515625, "loss": 3.8965811729431152 }, { "id": "identity_72123", "image": "AI2D/images/602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the diagram, what is the final stage before becoming an adult?" }, { "from": "gpt", "value": "The answer is PUPA" } ], "clip_score": 28.484375, "loss": 2.7059199810028076 }, { "id": "identity_58258", "image": "DVQA/images/bar_train_00153502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of all the bars in the graph?" }, { "from": "gpt", "value": "The answer is (8 + 9 + 3 + 1 + 2 + 1 + 2) / 7 = 3.71" } ], "clip_score": 20.859375, "loss": 1.2532122135162354 }, { "id": "identity_279982", "image": "VQA2.0/images/109450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the temperature outside?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.484375, "loss": 3.7301883697509766 }, { "id": "identity_250627", "image": "VQA-AS/images/16002.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What elements in the image suggest that the genre is romance?" }, { "from": "gpt", "value": "The answer is Romantic gestures, picnic setting, and pleasant atmosphere" } ], "clip_score": 35.8125, "loss": 3.2330079078674316 }, { "id": "identity_229307", "image": "UniGeo/images/896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC中,∠BAC=90°,AC=12,AB=10,D是AC上一个动点,以AD为直径的⊙O交BD于E,若线段CE的最小值为8,求AD的长度。" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 26.5, "loss": 2.54914927482605 }, { "id": "identity_182756", "image": "A-OKVQA/images/152857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the elephants' skin?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.890625, "loss": 4.076515197753906 }, { "id": "identity_302300", "image": "IconQA/images/75502.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many of these are there?" }, { "from": "gpt", "value": "The answer is 30" } ], "clip_score": 15.84375, "loss": 2.173673391342163 }, { "id": "identity_219727", "image": "TextVQA/images/fe531f1bc6187758.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the road indicated by the green street sign?" }, { "from": "gpt", "value": "The answer is Shifu Rd" } ], "clip_score": 39.8125, "loss": 2.005589485168457 }, { "id": "identity_285844", "image": "TabMWP/images/35342.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum number of peppermint candies that Valeria, a candy store employee, placed in jars of different sizes?" }, { "from": "gpt", "value": "The answer is 83" } ], "clip_score": 23.390625, "loss": 3.154571533203125 }, { "id": "identity_229132", "image": "DVQA/images/bar_train_00016282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of people who prefer the object 'wisdom' across all categories, and how does it compare to the total percentage of people who prefer the object 'cut' across all categories?" }, { "from": "gpt", "value": "The answer is Wisdom has a higher total percentage (240%) compared to Cut (140%)" } ], "clip_score": 36.53125, "loss": 1.9025651216506958 }, { "id": "identity_289275", "image": "A-OKVQA/images/430125.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which type of area is the man consuming pizza? \nChoices:\n(A) cow pasture\n(B) midway\n(C) ski lodge\n(D) front yard" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.921875, "loss": 4.5650763511657715 }, { "id": "identity_71019", "image": "TQA/images/question_images/waves_8224.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the term for the lowest point of a wave?" }, { "from": "gpt", "value": "The answer is Trough" } ], "clip_score": 28.984375, "loss": 3.128326177597046 }, { "id": "identity_206877", "image": "PMC-VQA/images/PMC6208962_Fig9_388583.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a presence of a disc herniation in this image?" }, { "from": "gpt", "value": "The answer is Yes, there is a disc herniation present" } ], "clip_score": 29.96875, "loss": 1.7889750003814697 }, { "id": "identity_233239", "image": "ScienceQA/images/6832.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that borders Minnesota to the south and Wisconsin to the west?" }, { "from": "gpt", "value": "The answer is Des Moines" } ], "clip_score": 29.390625, "loss": 1.8761937618255615 }, { "id": "identity_181937", "image": "CLEVR-Math/images/CLEVR_train_050060.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more shiny sphere is added to the image, how many spheres will there be in total?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.90625, "loss": 2.2000536918640137 }, { "id": "identity_218737", "image": "A-OKVQA/images/561037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 23.671875, "loss": 4.475627899169922 }, { "id": "identity_19339", "image": "FigureQA/images/71703.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Peru have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.890625, "loss": 3.943483352661133 }, { "id": "identity_131332", "image": "TextVQA/images/0ef1e20affd684bf.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many microphones are visible on the lower part of the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.109375, "loss": 2.300212860107422 }, { "id": "identity_168770", "image": "IconQA/images/89237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the bottom row of squares was to continue, what would be the size of the 11th square?" }, { "from": "gpt", "value": "The answer is Small square" } ], "clip_score": 29.078125, "loss": 4.380585193634033 }, { "id": "identity_241098", "image": "TabMWP/images/24746.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Darren buys a spool of gold wire, a roll of purple wrapping paper, and a sheet of animal stickers, how much money will he have left?" }, { "from": "gpt", "value": "The answer is -$0.56" } ], "clip_score": 35.375, "loss": 2.5720319747924805 }, { "id": "identity_300613", "image": "PlotQA/images/101401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference between two consecutive major ticks on this axis?" }, { "from": "gpt", "value": "The answer is 2.0" } ], "clip_score": 19.40625, "loss": 2.58286714553833 }, { "id": "identity_21306", "image": "TextVQA/images/39478eed4e70d4f2.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the first number in the catcher's player number?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 24.078125, "loss": 2.4864561557769775 }, { "id": "identity_111128", "image": "VQA2.0/images/160428.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What pattern is on the awning of the market stall?" }, { "from": "gpt", "value": "The answer is Striped" } ], "clip_score": 28.953125, "loss": 2.6513264179229736 }, { "id": "identity_43245", "image": "DVQA/images/bar_train_00162714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of \"bass\" items?" }, { "from": "gpt", "value": "The answer is Store 3" } ], "clip_score": 31.640625, "loss": 2.313585042953491 }, { "id": "identity_275662", "image": "DocVQA/images/nqpc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is William H. Danforth located?" }, { "from": "gpt", "value": "The answer is Chairman" } ], "clip_score": 18.859375, "loss": 3.638829469680786 }, { "id": "identity_185551", "image": "TextVQA/images/5903269e2a55f1b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What architectural style is the structure on top of the clock tower?" }, { "from": "gpt", "value": "The answer is Modern" } ], "clip_score": 24.328125, "loss": 2.8650708198547363 }, { "id": "identity_46127", "image": "DVQA/images/bar_train_00014852.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average accuracy of all the algorithms shown in the graph?" }, { "from": "gpt", "value": "The answer is 5.25" } ], "clip_score": 36.5625, "loss": 2.047943353652954 }, { "id": "identity_113725", "image": "VQA2.0/images/258249.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many illuminated billboards can be seen in the image?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 19.875, "loss": 4.517640113830566 }, { "id": "identity_199426", "image": "VQA2.0/images/237608.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What objects are on the mantelpiece above the fireplace?" }, { "from": "gpt", "value": "The answer is Three bottles and a potted plant" } ], "clip_score": 27.984375, "loss": 2.802147150039673 }, { "id": "identity_294843", "image": "ChartQA/images/two_col_101216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in euros between 2016 and 2013?" }, { "from": "gpt", "value": "The answer is 20.32" } ], "clip_score": 20.03125, "loss": 2.077362060546875 }, { "id": "identity_317808", "image": "Geometry3K/images/600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Polygons B C D E and R S T U are congruent. Determine w.\nChoices:\n(A) 3\n(B) 10\n(C) 20\n(D) 42" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.4375, "loss": 2.6038923263549805 }, { "id": "identity_184851", "image": "A-OKVQA/images/440534.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the vehicle closest to the snowboarder?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 23.140625, "loss": 3.6068084239959717 }, { "id": "identity_34161", "image": "TextVQA/images/21a9959e9dcb4f97.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what number is the kid in red?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 12.3671875, "loss": 4.360787868499756 }, { "id": "identity_80220", "image": "Geometry3K/images/2078.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the length of segment DE if the radius of the circle is 5 units" }, { "from": "gpt", "value": "The answer is 5 * cos(52) units" } ], "clip_score": 31.453125, "loss": 2.5756659507751465 }, { "id": "identity_89960", "image": "Geometry3K/images/1071.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that trapezoids ACFD and YWVZ are similar, find the value of x" }, { "from": "gpt", "value": "The answer is x = 21/16 * (3y - 1)" } ], "clip_score": 36.875, "loss": 2.2991862297058105 }, { "id": "identity_108392", "image": "Super-CLEVR/images/superCLEVR_new_001932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any object in the image that is not a vehicle?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.4375, "loss": 3.0509445667266846 }, { "id": "identity_54129", "image": "FigureQA/images/42469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have a legend explaining the colors?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 32.0, "loss": 3.2155909538269043 }, { "id": "identity_112478", "image": "TextVQA/images/b995fd81107bc80e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the laptop's keyboard?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 22.0625, "loss": 3.5625319480895996 }, { "id": "identity_224981", "image": "FigureQA/images/84788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many times do the Brown and Dark Orchid lines intersect, and at what approximate x-axis values do these intersections occur?" }, { "from": "gpt", "value": "The answer is Two times, at approximately x = 10 and x = 40" } ], "clip_score": 23.953125, "loss": 2.3618674278259277 }, { "id": "identity_14769", "image": "TQA/images/abc_question_images/earth_eclipses_11647.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which label represents the sun?\nChoices:\n(A) U\n(B) J\n(C) W\n(D) Y" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.34375, "loss": 3.8841919898986816 }, { "id": "identity_246049", "image": "TextVQA/images/4bf43a7b2a898044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many banners are visible in the image, and what are the types of businesses or services advertised on each banner?" }, { "from": "gpt", "value": "The answer is Four banners; Real Estate, Insurance, Food, Life Care" } ], "clip_score": 9.90625, "loss": 3.5607945919036865 }, { "id": "identity_63724", "image": "ScienceQA/images/2911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state to the southwest of the highlighted state?" }, { "from": "gpt", "value": "The answer is California" } ], "clip_score": 31.578125, "loss": 3.2285399436950684 }, { "id": "identity_6278", "image": "FigureQA/images/75486.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Green Yellow intersect Medium Seafoam?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.71875, "loss": 3.72147798538208 }, { "id": "identity_12628", "image": "TextVQA/images/075d182b04a459de.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what brand is this watch?" }, { "from": "gpt", "value": "The answer is lorus" } ], "clip_score": 28.609375, "loss": 2.959502935409546 }, { "id": "identity_233067", "image": "GeoQA+/images/194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在Rt△ABC中,∠BAC=90°,以Rt△ABC的三边为边分别向外作等边三角形△A'BC,△AB'C,△ABC',若△A'BC,△AB'C的面积分别是10和4,求Rt△ABC的面积。" }, { "from": "gpt", "value": "The answer is \\(\\frac{4\\sqrt{30}}{3}\\)" } ], "clip_score": 30.890625, "loss": 2.4733643531799316 }, { "id": "identity_122319", "image": "A-OKVQA/images/60992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the background?" }, { "from": "gpt", "value": "The answer is Two people" } ], "clip_score": 12.390625, "loss": 4.507182598114014 }, { "id": "identity_242705", "image": "PMC-VQA/images/PMC9415991_nanomaterials-12-02873-f001_386532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which areas represent both α and β in the Figure?" }, { "from": "gpt", "value": "The answer is No common area" } ], "clip_score": 18.40625, "loss": 4.392461776733398 }, { "id": "identity_54466", "image": "DVQA/images/bar_train_00138951.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the second highest number of units sold?" }, { "from": "gpt", "value": "The answer is sale" } ], "clip_score": 26.40625, "loss": 4.084114074707031 }, { "id": "identity_66998", "image": "AI2D/images/4177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How can Sedimentary Rock transition into Magma?" }, { "from": "gpt", "value": "The answer is Melting" } ], "clip_score": 31.765625, "loss": 2.9689559936523438 }, { "id": "identity_50674", "image": "FigureQA/images/44146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of Brown in the pie chart?" }, { "from": "gpt", "value": "The answer is Less than 50% but more than 25%" } ], "clip_score": 30.21875, "loss": 1.7633161544799805 }, { "id": "identity_329416", "image": "PMC-VQA/images/PMC7412277_materials-13-03186-f001_230475.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Under what condition was the microstructure shown in the image deformed? \nChoices:\n(A) Solid-solution \n(B) Annealed \n(C) Aged \n(D) Cold-worked" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.296875, "loss": 4.771032333374023 }, { "id": "identity_329972", "image": "A-OKVQA/images/261487.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who was the most recent player of this game to be on the cover of Sports Illustrated?\nChoices:\n(A) naomi osaka\n(B) andre agassi\n(C) monica seles\n(D) serena williams" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.015625, "loss": 4.084259033203125 }, { "id": "identity_55928", "image": "DVQA/images/bar_train_00056383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets have an accuracy greater than 5 for the \"lap\" algorithm?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.640625, "loss": 2.3425447940826416 }, { "id": "identity_10946", "image": "TQA/images/question_images/parts_worm_7300.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the outermost layer of the earthworm called?\nChoices:\n(A) nephrostome\n(B) septum\n(C) cuticle\n(D) coelom" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 4.28798770904541 }, { "id": "identity_27892", "image": "DocVQA/images/kzbn0226_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the page no mentioned in this document?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.46875, "loss": 2.890716552734375 }, { "id": "identity_195263", "image": "PMC-VQA/images/PMC3655489_fig2_204832.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the radiolucency observed on the left or right side of the patient's jaw?" }, { "from": "gpt", "value": "The answer is Right side" } ], "clip_score": 28.9375, "loss": 4.120883941650391 }, { "id": "identity_40109", "image": "DVQA/images/bar_train_00018012.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer powder in both categories?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 21.65625, "loss": 2.306513547897339 }, { "id": "identity_45213", "image": "MapQA/images/map_1641.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any states with no data available?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.375, "loss": 2.971076250076294 }, { "id": "identity_271374", "image": "Geometry3K/images/1121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Calculate the perimeter or circumference of the figure and round to the nearest tenth. Which of the following options is the correct answer?\n(A) 12.6\n(B) 24\n(C) 25.1\n(D) 50.2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.8125, "loss": 2.387580394744873 }, { "id": "identity_262563", "image": "PlotQA/images/3907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dotlines?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.34375, "loss": 2.4702303409576416 }, { "id": "identity_307503", "image": "AI2D/images/4667.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which bird is indicated to use tools according to the diagram?\nChoices:\n(A) Medium ground finch\n(B) Vegetarian finch\n(C) Mangrove finch\n(D) Cocos finch" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.296875, "loss": 3.3609535694122314 }, { "id": "identity_93689", "image": "IconQA/images/13714.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 8 times, how many times would we expect it to not land on pink?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.375, "loss": 2.550063371658325 }, { "id": "identity_272655", "image": "IconQA/images/26086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the height of the plant in terms of the number of cubes?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.734375, "loss": 2.510249376296997 }, { "id": "identity_320067", "image": "VizWiz/images/VizWiz_train_00018770.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this item?" }, { "from": "gpt", "value": "The answer is can beans" } ], "clip_score": 23.234375, "loss": 2.951465129852295 }, { "id": "identity_309987", "image": "Geometry3K/images/1549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, m \\angle 1 = 50 and m \\angle 3 = 60. Determine the measure of \\angle 7.\nChoices:\n(A) 110\n(B) 120\n(C) 130\n(D) 140" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.859375, "loss": 2.328772783279419 }, { "id": "identity_37333", "image": "VQA2.0/images/427914.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day is it?" }, { "from": "gpt", "value": "The answer is afternoon" } ], "clip_score": 10.6796875, "loss": 3.6251869201660156 }, { "id": "identity_264974", "image": "AI2D/images/1388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which part resembles a tube-like structure?\nChoices:\n(A) E\n(B) C\n(C) F\n(D) D" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.71875, "loss": 4.349193572998047 }, { "id": "identity_312459", "image": "TextVQA/images/f62404953e413e65.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this phone?" }, { "from": "gpt", "value": "The answer is hiphone" } ], "clip_score": 27.796875, "loss": 3.227933645248413 }, { "id": "identity_297901", "image": "FigureQA/images/89098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is the roughest? \nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 15.2734375, "loss": 4.025171279907227 }, { "id": "identity_273459", "image": "DocVQA/images/htkm0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the reporter mentioned on the given page?" }, { "from": "gpt", "value": "The answer is johanna dwyer" } ], "clip_score": 21.328125, "loss": 3.1964540481567383 }, { "id": "identity_60480", "image": "DocVQA/images/qkxf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the letter?" }, { "from": "gpt", "value": "The answer is To provide identification numbers and schedules for candidates" } ], "clip_score": 17.46875, "loss": 4.639968395233154 }, { "id": "identity_268213", "image": "TextVQA/images/c28330ba7b770f16.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who wrote the book \"Invisibles\"?" }, { "from": "gpt", "value": "The answer is grant morrison" } ], "clip_score": 19.9375, "loss": 2.584787130355835 }, { "id": "identity_229987", "image": "ScienceQA/images/8339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the physical properties of wet paint that classify it as a liquid, and how do these properties change as it dries?" }, { "from": "gpt", "value": "The answer is Wet paint is a liquid that becomes a solid when dry" } ], "clip_score": 29.5625, "loss": 3.2305893898010254 }, { "id": "identity_100708", "image": "IconQA/images/12447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices does this shape have?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.25, "loss": 2.179788827896118 }, { "id": "identity_208244", "image": "A-OKVQA/images/532994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the men on the steam engine wearing matching uniforms?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.921875, "loss": 3.0638177394866943 }, { "id": "identity_227953", "image": "IconQA/images/60525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Olivia puts her baby down for an afternoon nap at 1:30 P.M. and the baby sleeps for 2 hours and 45 minutes, what time will the baby wake up?" }, { "from": "gpt", "value": "The answer is 4:15 P.M" } ], "clip_score": 22.796875, "loss": 1.2644459009170532 }, { "id": "identity_160158", "image": "GeoQA+/images/2122.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In rhombus ABCD, with AB = 12 and the ratio of AF to FC is 1:3, what is the area of triangle AEF?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 37.5, "loss": 2.59501576423645 }, { "id": "identity_249251", "image": "TextVQA/images/2d648ed9328308fd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary colors used in the advertisement for the soft drink on this court, and how do they relate to the brand's logo?" }, { "from": "gpt", "value": "The answer is red, blue, and white" } ], "clip_score": 16.21875, "loss": 3.341400146484375 }, { "id": "identity_97592", "image": "IconQA/images/44120.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the letter after the transformation?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 28.4375, "loss": 2.1519742012023926 }, { "id": "identity_74769", "image": "AI2D/images/3259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which organelle is responsible for producing energy in the form of ATP?" }, { "from": "gpt", "value": "The answer is Mitochondrion" } ], "clip_score": 32.15625, "loss": 1.3560163974761963 }, { "id": "identity_272259", "image": "A-OKVQA/images/328935.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for the gathering of all the people? Is it for free food, traveling, a concert, or shopping?\nChoices:\n(A) free food\n(B) traveling\n(C) concert\n(D) shopping" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 16.671875, "loss": 4.423613548278809 }, { "id": "identity_130776", "image": "VQA2.0/images/574971.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What language is the dictionary on the right side of the shelf?" }, { "from": "gpt", "value": "The answer is Spanish" } ], "clip_score": 24.421875, "loss": 3.3951010704040527 }, { "id": "identity_185738", "image": "PMC-VQA/images/PMC6711709_fig5_29123.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of image is this?" }, { "from": "gpt", "value": "The answer is Electron Microscopy" } ], "clip_score": 31.109375, "loss": 2.5136334896087646 }, { "id": "identity_302231", "image": "IconQA/images/104023.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many shapes are there?\nChoices:\n(A) 5\n(B) 2\n(C) 1\n(D) 4\n(E) 3" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.609375, "loss": 3.2647767066955566 }, { "id": "identity_45956", "image": "FigureQA/images/47862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate ratio of Periwinkle to Orange in the pie chart?" }, { "from": "gpt", "value": "The answer is Periwinkle is greater than Orange" } ], "clip_score": 38.34375, "loss": 2.055304765701294 }, { "id": "identity_318653", "image": "UniGeo/images/495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图所示,圆锥的母线长为10cm,其侧面展开图是圆心角为216°的扇形,则该圆锥的高是多少?\n\n选项:\n(A) 5cm\n(B) 6cm\n(C) 7cm\n(D) 8cm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.90625, "loss": 2.234757900238037 }, { "id": "identity_17612", "image": "IconQA/images/75873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a paw. Which picture is third?\nChoices:\n(A) house\n(B) dog\n(C) paw" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.859375, "loss": 4.606442451477051 }, { "id": "identity_186055", "image": "A-OKVQA/images/457119.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any pigeons in flight in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.296875, "loss": 3.083683490753174 }, { "id": "identity_51609", "image": "PlotQA/images/75073.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the unit of measurement used on the x-axis of the graph?" }, { "from": "gpt", "value": "The answer is Mt of CO2 equivalent" } ], "clip_score": 25.75, "loss": 2.8637046813964844 }, { "id": "identity_322911", "image": "PMC-VQA/images/PMC8586710_F11_110541.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the visible structure on this surface?\nChoices:\n(A) Stomata \n(B) Root hairs \n(C) Salt glands \n(D) Cuticle layers" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.796875, "loss": 5.143212795257568 }, { "id": "identity_302513", "image": "DVQA/images/bar_train_00090190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which algorithm has the smallest accuracy summed across these datasets?" }, { "from": "gpt", "value": "The answer is mail" } ], "clip_score": 33.84375, "loss": 3.4700241088867188 }, { "id": "identity_58759", "image": "DVQA/images/bar_train_00089909.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which store sold the least number of 'whisky' units?" }, { "from": "gpt", "value": "The answer is dust" } ], "clip_score": 31.71875, "loss": 4.143820762634277 }, { "id": "identity_91992", "image": "IconQA/images/38158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one more car is added to the first row, how many cars will be there in total?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 29.078125, "loss": 1.8451224565505981 }, { "id": "identity_118412", "image": "VQA2.0/images/279131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing item is the person third from the right wearing on their lower body?" }, { "from": "gpt", "value": "The answer is Skirt" } ], "clip_score": 16.875, "loss": 3.4363536834716797 }, { "id": "identity_107697", "image": "PMC-VQA/images/PMC8195437_ppat.1009632.g003_464513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique was used in this image?" }, { "from": "gpt", "value": "The answer is Staining technique based on color and pattern" } ], "clip_score": 22.25, "loss": 3.418062925338745 }, { "id": "identity_248220", "image": "TabMWP/images/24823.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of schools in the towns listed?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 33.8125, "loss": 2.5230894088745117 }, { "id": "identity_63672", "image": "TQA/images/abc_question_images/parts_neuron_17206.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'V' represent in this neuron diagram?" }, { "from": "gpt", "value": "The answer is Y" } ], "clip_score": 33.28125, "loss": 3.4847474098205566 }, { "id": "identity_195845", "image": "PMC-VQA/images/PMC3813812_f3-ol-06-05-1499_240336.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any other abnormalities in the brain aside from the mass?" }, { "from": "gpt", "value": "The answer is No other visible abnormalities" } ], "clip_score": 26.96875, "loss": 3.2780063152313232 }, { "id": "identity_200116", "image": "TextVQA/images/2d406449b97759a7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the contact number provided for more information?" }, { "from": "gpt", "value": "The answer is 028 9032 0184" } ], "clip_score": 15.390625, "loss": 1.7388615608215332 }, { "id": "identity_128863", "image": "PMC-VQA/images/PMC8117624_Fig2_439345.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant shade of the image?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.546875, "loss": 4.730657577514648 }, { "id": "identity_311248", "image": "DocVQA/images/gtmd0081_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much was spent in total?" }, { "from": "gpt", "value": "The answer is $ 17.15" } ], "clip_score": 23.21875, "loss": 1.9631973505020142 }, { "id": "identity_234562", "image": "ScienceQA/images/2776.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Explain how grasshoppers use their physical abilities to evade predators and provide two specific examples of these abilities in action" }, { "from": "gpt", "value": "The answer is Jumping and flying help grasshoppers evade predators effectively" } ], "clip_score": 31.5625, "loss": 3.0663881301879883 }, { "id": "identity_185193", "image": "PMC-VQA/images/PMC7249772_FIG2_178143.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any surgical implants visible in the X-ray?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.21875, "loss": 3.0219619274139404 }, { "id": "identity_175275", "image": "IconQA/images/64822.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one oval is removed, will the remaining number of ovals be even or odd?" }, { "from": "gpt", "value": "The answer is Odd" } ], "clip_score": 21.75, "loss": 2.450937509536743 }, { "id": "identity_117581", "image": "VQA2.0/images/63306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is prominently displayed on the skateboarder's t-shirt?" }, { "from": "gpt", "value": "The answer is ZERO" } ], "clip_score": 25.109375, "loss": 2.964691162109375 }, { "id": "identity_42380", "image": "PlotQA/images/42912.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many countries are represented in this graph?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 24.78125, "loss": 2.538780927658081 }, { "id": "identity_157735", "image": "ScienceQA/images/19284.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many degrees Celsius did the temperature of the battery in the less expensive phone decrease?" }, { "from": "gpt", "value": "The answer is 14°C" } ], "clip_score": 23.171875, "loss": 2.333799362182617 }, { "id": "identity_70073", "image": "TQA/images/abc_question_images/human_system_muscular_16161.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the muscle corresponding to label 'X' in this image" }, { "from": "gpt", "value": "The answer is Pectoralis major muscle" } ], "clip_score": 30.0625, "loss": 1.7925382852554321 }, { "id": "identity_29156", "image": "TextVQA/images/d4fbce59a74dc74c.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the yellow big thing they are cooking?" }, { "from": "gpt", "value": "The answer is answering does not require reading text in the image" } ], "clip_score": 24.609375, "loss": 4.8724517822265625 }, { "id": "identity_264384", "image": "VQA2.0/images/393503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this circus travel from place to place?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 16.375, "loss": 3.225996732711792 }, { "id": "identity_333263", "image": "IconQA/images/35303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Are there an even or odd number of circles?\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.875, "loss": 3.878657817840576 }, { "id": "identity_164761", "image": "Geometry3K/images/815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the diagonal connecting the vertices with sides 2x - 4 and 3y - 2 if x = 8 and y = 26/3?" }, { "from": "gpt", "value": "The answer is the diagonal length is 18" } ], "clip_score": 35.125, "loss": 2.83980393409729 }, { "id": "identity_11617", "image": "Geometry3K/images/891.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find H K.\nChoices:\n(A) 10\n(B) 12\n(C) 15\n(D) 20" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.1875, "loss": 3.009014844894409 }, { "id": "identity_67246", "image": "TQA/images/question_images/human_system_reproductory_7017.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the external opening at the end of the male urethra?" }, { "from": "gpt", "value": "The answer is Urinary meatus" } ], "clip_score": 29.28125, "loss": 2.138671398162842 }, { "id": "identity_303506", "image": "ScienceQA/images/7258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Reno\n(B) Carson City\n(C) Baton Rouge\n(D) Jefferson City" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.171875, "loss": 4.501611709594727 }, { "id": "identity_2321", "image": "TextVQA/images/84b3fc5b08f24dba.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is being sold at the stall on the left?" }, { "from": "gpt", "value": "The answer is bun mang vit" } ], "clip_score": 25.359375, "loss": 4.74965238571167 }, { "id": "identity_49536", "image": "DVQA/images/bar_train_00043429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for the item \"feel\" across all stores?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 26.84375, "loss": 2.4510579109191895 }, { "id": "identity_158337", "image": "ScienceQA/images/17802.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does this image likely depict based on the lighting and shadows?" }, { "from": "gpt", "value": "The answer is Likely around midday" } ], "clip_score": 13.5546875, "loss": 3.254567861557007 }, { "id": "identity_180588", "image": "IconQA/images/4670.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the twig grows 1 inch longer, what will be its new length?" }, { "from": "gpt", "value": "The answer is 5 inches" } ], "clip_score": 33.6875, "loss": 2.2507412433624268 }, { "id": "identity_145382", "image": "DocVQA/images/mgyk0226_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount requested for animal and animal care costs?" }, { "from": "gpt", "value": "The answer is $1400" } ], "clip_score": 23.390625, "loss": 2.32308292388916 }, { "id": "identity_105123", "image": "TextVQA/images/a51887558259c0cd.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more stalls on the left or the right side of the aisle?" }, { "from": "gpt", "value": "The answer is Left side" } ], "clip_score": 23.71875, "loss": 3.41900372505188 }, { "id": "identity_323138", "image": "A-OKVQA/images/509339.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the rectangular object on the wall allow for?\nChoices:\n(A) storage\n(B) vision\n(C) water flow\n(D) electrical power" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 10.3359375, "loss": 4.5369439125061035 }, { "id": "identity_148048", "image": "DVQA/images/bar_train_00162999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'volume' category have a negative value for any group?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.59375, "loss": 3.2294578552246094 }, { "id": "identity_134771", "image": "DocVQA/images/mnbx0223_75.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of Proposed Dividend for the year ended 31st March, 2007?" }, { "from": "gpt", "value": "The answer is 1166.29" } ], "clip_score": 26.359375, "loss": 2.320254325866699 }, { "id": "identity_229630", "image": "VQA2.0/images/291140.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What safety measures should be in place for small children to participate in this activity?" }, { "from": "gpt", "value": "The answer is Supervision, protective gear, safe area, kite safety, mild wind" } ], "clip_score": 22.484375, "loss": 3.3048174381256104 }, { "id": "identity_276087", "image": "PMC-VQA/images/PMC7360319_F1_214021.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to visualize the lesion in the right anteromedial temporal lobe?\nChoices:\n(A) SWI\n(B) GRE\n(C) T2 fluid attenuated inversion recovery\n(D) DSC-MRI" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.546875, "loss": 3.7942802906036377 }, { "id": "identity_182752", "image": "PMC-VQA/images/PMC7648378_Fig1_293222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any surgical markers be identified in the X-ray image?" }, { "from": "gpt", "value": "The answer is No surgical markers visible" } ], "clip_score": 29.765625, "loss": 3.438908576965332 }, { "id": "identity_31553", "image": "ScienceQA/images/2630.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter marks the Louisiana territory?\nChoices:\n(A) A\n(B) D\n(C) C\n(D) B" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 30.40625, "loss": 4.025464057922363 }, { "id": "identity_321052", "image": "PMC-VQA/images/PMC9182059_molecules-27-03631-f005_306952.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the double arrow in this indicate? \nChoices:\n(A) Expansion \n(B) Immobilization \n(C) Control \n(D) Double magnification" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.140625, "loss": 4.467520713806152 }, { "id": "identity_111623", "image": "Super-CLEVR/images/superCLEVR_new_000241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the bicycle standing upright or lying down?" }, { "from": "gpt", "value": "The answer is Standing upright" } ], "clip_score": 24.65625, "loss": 2.19073486328125 }, { "id": "identity_226389", "image": "A-OKVQA/images/221446.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify each type of vegetable shown in the image and list them" }, { "from": "gpt", "value": "The answer is Carrots, broccoli, leeks" } ], "clip_score": 31.75, "loss": 2.1117796897888184 }, { "id": "identity_44540", "image": "DocVQA/images/fmdv0228_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the clerk's initials who checked in the guest?" }, { "from": "gpt", "value": "The answer is RU" } ], "clip_score": 14.875, "loss": 3.296144485473633 }, { "id": "identity_215383", "image": "PMC-VQA/images/PMC8502242_FIG1_79241.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique is used in this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.71875, "loss": 2.777594804763794 }, { "id": "identity_206759", "image": "A-OKVQA/images/289100.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any fruits in the image that are not covered or hanging?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.125, "loss": 3.0276124477386475 }, { "id": "identity_77367", "image": "TQA/images/abc_question_images/parts_neuron_17227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label corresponds to the part of the neuron that is known as the axon terminal?" }, { "from": "gpt", "value": "The answer is Y" } ], "clip_score": 33.0625, "loss": 2.473464250564575 }, { "id": "identity_71699", "image": "AI2D/images/515.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What role does the Fungus play in this food web?" }, { "from": "gpt", "value": "The answer is Decomposer" } ], "clip_score": 35.59375, "loss": 2.51873517036438 }, { "id": "identity_212681", "image": "PMC-VQA/images/PMC5058823_F2_82761.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this image show any signs of interstitial lung disease?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.21875, "loss": 2.638596534729004 }, { "id": "identity_209322", "image": "PMC-VQA/images/PMC4567364_f03_422925.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique was used to produce this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 32.15625, "loss": 2.405912160873413 }, { "id": "identity_103055", "image": "CLEVR-Math/images/CLEVR_train_016086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many unique shapes are present in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 23.828125, "loss": 2.4101154804229736 }, { "id": "identity_332095", "image": "PMC-VQA/images/PMC5648941_F1_229187.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the name of the fracture shown in this image?\nChoices:\n(A) Colles fracture \n(B) Intertrochanteric fracture \n(C) Femoral neck fracture \n(D) Greenstick fracture" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.21875, "loss": 4.758563995361328 }, { "id": "identity_127025", "image": "PMC-VQA/images/PMC8037832_Fig1_414845.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the anatomical structure indicated by the yellow arrow?" }, { "from": "gpt", "value": "The answer is Right lung nodule" } ], "clip_score": 31.6875, "loss": 4.172247886657715 }, { "id": "identity_22389", "image": "PMC-VQA/images/PMC6440287_F1_459012.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of imaging method was used in the study?\nChoices:\n(A) Computed Tomography (CT) scan \n(B) Magnetic Resonance Imaging (MRI) scan \n(C) Positron Emission Tomography (PET) scan \n(D) Ultrasound scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.359375, "loss": 4.152700424194336 }, { "id": "identity_39466", "image": "FigureQA/images/52866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Violet Red occupy more than a quarter of the pie chart?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 39.125, "loss": 3.0541346073150635 }, { "id": "identity_306172", "image": "TQA/images/abc_question_images/ocean_waves_19154.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: By what letter is the wavelength represented in this diagram?\n\nChoices:\n(A) D\n(B) U\n(C) C\n(D) S" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.3125, "loss": 3.3042874336242676 }, { "id": "identity_106824", "image": "VQA-AS/images/12771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geometric shape is the rug primarily composed of?" }, { "from": "gpt", "value": "The answer is Rectangle" } ], "clip_score": 21.96875, "loss": 2.9585835933685303 }, { "id": "identity_278526", "image": "DVQA/images/bar_train_00193657.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do more people prefer the object whole over nature?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 30.796875, "loss": 3.2696533203125 }, { "id": "identity_43287", "image": "DocVQA/images/gfyc0227_145.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the document?" }, { "from": "gpt", "value": "The answer is MERITHIOLATE, IODINE, FORMALDEHYDE TECHNIQUE FOR PROTOZOA IN FECES" } ], "clip_score": 20.9375, "loss": 2.78941011428833 }, { "id": "identity_72318", "image": "TQA/images/abc_question_images/human_system_reproductory_17028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the structure labeled 'M' in the context of the male reproductive system" }, { "from": "gpt", "value": "The answer is Scrotum" } ], "clip_score": 31.90625, "loss": 2.2439045906066895 }, { "id": "identity_230602", "image": "VizWiz/images/VizWiz_train_00013954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: List all the kitchen appliances visible in the image and describe their positions relative to each other" }, { "from": "gpt", "value": "The answer is Iron, toaster, juicer, kettle; left to right" } ], "clip_score": 27.984375, "loss": 2.9579813480377197 }, { "id": "identity_57273", "image": "DocVQA/images/fnxh0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the factory mentioned in the document?" }, { "from": "gpt", "value": "The answer is Longmont Factory" } ], "clip_score": 22.71875, "loss": 5.070233345031738 }, { "id": "identity_98023", "image": "IconQA/images/53216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rectangles are there for every triangle in the image?" }, { "from": "gpt", "value": "The answer is 1.125" } ], "clip_score": 32.53125, "loss": 2.1483817100524902 }, { "id": "identity_295264", "image": "TextVQA/images/cd83b6c8254e0f7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the duration of this movie?" }, { "from": "gpt", "value": "The answer is 110 minutes" } ], "clip_score": 15.4609375, "loss": 2.01116681098938 }, { "id": "identity_59155", "image": "MapQA/images/map_388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage range of tires in Texas?" }, { "from": "gpt", "value": "The answer is 51.4%-56.8%" } ], "clip_score": 32.78125, "loss": 1.7477000951766968 }, { "id": "identity_143743", "image": "DocVQA/images/kzlf0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is involved in the appointment method for the Nutrition Committee members?" }, { "from": "gpt", "value": "The answer is The Steering Committee Chairman and Nutrition Committee Chairman" } ], "clip_score": 28.0625, "loss": 1.9437037706375122 }, { "id": "identity_281635", "image": "TQA/images/question_images/seasons_2638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What event occurs on September 23rd?\nChoices:\n(A) Fall Equinox\n(B) Fall Solstice\n(C) Spring Equinox\n(D) Summer Solstice" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 37.84375, "loss": 3.865788221359253 }, { "id": "identity_124193", "image": "VQA2.0/images/178268.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the horse's coat?" }, { "from": "gpt", "value": "The answer is Pinto" } ], "clip_score": 24.5, "loss": 3.133474588394165 }, { "id": "identity_120667", "image": "TextVQA/images/42132546ea298db4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What famous bridge is depicted in the background of the image?" }, { "from": "gpt", "value": "The answer is Golden Gate Bridge" } ], "clip_score": 16.921875, "loss": 2.3528332710266113 }, { "id": "identity_113967", "image": "VQA-AS/images/4226.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of ball is the child on the right holding?" }, { "from": "gpt", "value": "The answer is American football" } ], "clip_score": 19.109375, "loss": 2.9730682373046875 }, { "id": "identity_107298", "image": "TextVQA/images/e9571753a3c78e3b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players are wearing white jerseys?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 24.140625, "loss": 2.294780969619751 }, { "id": "identity_217165", "image": "VizWiz/images/VizWiz_train_00001108.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop?" }, { "from": "gpt", "value": "The answer is Apple" } ], "clip_score": 31.421875, "loss": 3.6258385181427 }, { "id": "identity_306725", "image": "AI2D/images/4108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What gas is released into the air in this process?\nChoices:\n(A) Oxygen\n(B) Sulfur\n(C) Calcium\n(D) Carbon dioxide" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.0, "loss": 4.094152927398682 }, { "id": "identity_24310", "image": "IconQA/images/38349.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) turn\n(C) flip" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.3125, "loss": 4.322114944458008 }, { "id": "identity_248329", "image": "Geometry3K/images/1203.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the angles in the triangle, and what is the value of x?" }, { "from": "gpt", "value": "The answer is 91" } ], "clip_score": 32.625, "loss": 3.562957763671875 }, { "id": "identity_68253", "image": "AI2D/images/3751.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled directly above the lungs?" }, { "from": "gpt", "value": "The answer is Trachea (windpipe)" } ], "clip_score": 32.5, "loss": 1.9897663593292236 }, { "id": "identity_3925", "image": "Geometry3K/images/935.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y.\nChoices:\n(A) \\sqrt { 285 }\n(B) \\sqrt { 510 }\n(C) 2 \\sqrt { 285 }\n(D) 2 \\sqrt { 510 }" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.421875, "loss": 3.058154582977295 }, { "id": "identity_166422", "image": "TabMWP/images/9160.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more girls are there in Mr. Center's class than in Miss West's class?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 33.4375, "loss": 2.141578435897827 }, { "id": "identity_211246", "image": "PMC-VQA/images/PMC2726940_pone-0006738-g006_43617.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is labeled with the letter 'C' in the image?" }, { "from": "gpt", "value": "The answer is Liver lobe" } ], "clip_score": 24.859375, "loss": 4.296097278594971 }, { "id": "identity_159438", "image": "GeoQA+/images/3054.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠CAB is 50°, what is the measure of ∠OAC?" }, { "from": "gpt", "value": "The answer is 25°" } ], "clip_score": 34.15625, "loss": 1.7394670248031616 }, { "id": "identity_54942", "image": "FigureQA/images/68914.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point on the x-axis does Orchid reach its minimum value?" }, { "from": "gpt", "value": "The answer is 100" } ], "clip_score": 19.234375, "loss": 2.0125982761383057 }, { "id": "identity_232151", "image": "UniGeo/images/1146.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,∠A=26°,求∠BOC的度数。" }, { "from": "gpt", "value": "The answer is 52°" } ], "clip_score": 30.703125, "loss": 2.142125368118286 }, { "id": "identity_25688", "image": "A-OKVQA/images/427091.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How is the child crossing the street seating making progress?\nChoices:\n(A) being pulled\n(B) mechanized wheelchair\n(C) being pushed\n(D) sheer will" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.453125, "loss": 4.169147491455078 }, { "id": "identity_146286", "image": "FigureQA/images/5062.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between the bars labeled \"Indigo\" and \"Dark Slate\"?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 27.53125, "loss": 2.1137092113494873 }, { "id": "identity_57670", "image": "FigureQA/images/4964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 33.59375, "loss": 3.586113452911377 }, { "id": "identity_174111", "image": "TabMWP/images/6794.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of all y-values in the table?" }, { "from": "gpt", "value": "The answer is 38" } ], "clip_score": 29.953125, "loss": 2.1226582527160645 }, { "id": "identity_192061", "image": "PMC-VQA/images/PMC4109997_pone-0103283-g004_308561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level used to capture this image?" }, { "from": "gpt", "value": "The answer is Not specified in the image" } ], "clip_score": 24.96875, "loss": 3.341066598892212 }, { "id": "identity_309322", "image": "Geometry3K/images/676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In \\triangle E D F, T is the centroid and F T = 12. Determine T Q.\nChoices:\n(A) 3\n(B) 6\n(C) 12\n(D) 24" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.65625, "loss": 2.480808734893799 }, { "id": "identity_284201", "image": "PMC-VQA/images/PMC4377159_Fig1_372559.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What was discovered in the patient's left neck area?\nChoices:\n(A) Adenocarcinoma\n(B) Hodgkin Lymphoma\n(C) Tonsillitis\n(D) Leukemia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.6875, "loss": 4.959186553955078 }, { "id": "identity_92701", "image": "TabMWP/images/8291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Sophie wants to buy the yellow dress and the pair of boots, how much money does she need in total?" }, { "from": "gpt", "value": "The answer is $50.00" } ], "clip_score": 31.953125, "loss": 1.8568607568740845 }, { "id": "identity_109720", "image": "TextVQA/images/52c8b4a51a94f8ae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the cup on the table?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 15.4453125, "loss": 3.9703993797302246 }, { "id": "identity_158452", "image": "ScienceQA/images/4471.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Assuming no disturbances to the layers, which fossil is the most recent?" }, { "from": "gpt", "value": "The answer is Butterfly fossil" } ], "clip_score": 32.46875, "loss": 2.247943878173828 }, { "id": "identity_118864", "image": "PMC-VQA/images/PMC7699868_nutrients-12-03566-f003_309851.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging technique was used to capture this image?" }, { "from": "gpt", "value": "The answer is Endoscopy" } ], "clip_score": 27.65625, "loss": 1.936513066291809 }, { "id": "identity_79806", "image": "UniGeo/images/42.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that AB is the diameter of circle O and angle ABC is 70 degrees, what is the measure of angle ADB?" }, { "from": "gpt", "value": "The answer is 10 degrees" } ], "clip_score": 35.53125, "loss": 2.0383589267730713 }, { "id": "identity_335640", "image": "PMC-VQA/images/PMC2651859_F7_35486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which vessel is affected by hypoplasia based on the imaging in this figure?\nChoices:\n(A) Superior Vena Cava \n(B) Inferior Vena Cava \n(C) Pulmonary Vein \n(D) Pulmonary Artery" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.359375, "loss": 3.461247682571411 }, { "id": "identity_290155", "image": "MapQA/images/map_7498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value in the South?" }, { "from": "gpt", "value": "The answer is 18.4%-29.2%" } ], "clip_score": 23.1875, "loss": 2.1927196979522705 }, { "id": "identity_102135", "image": "IconQA/images/105318.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If both clocks are working correctly, which friend's clock is faster based on the time shown?" }, { "from": "gpt", "value": "The answer is Rosa's clock" } ], "clip_score": 22.359375, "loss": 2.190315008163452 }, { "id": "identity_244083", "image": "PlotQA/images/89716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which region had the highest cost as a percentage of GNI in 1996, and what was the percentage?" }, { "from": "gpt", "value": "The answer is Small states, 0.55%" } ], "clip_score": 21.09375, "loss": 3.2902143001556396 }, { "id": "identity_22824", "image": "A-OKVQA/images/498669.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How full does he hope the boat will be at the end of the day?\nChoices:\n(A) half full\n(B) full\n(C) empty\n(D) quarter full" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 15.5, "loss": 4.424255847930908 }, { "id": "identity_288106", "image": "A-OKVQA/images/517017.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which country's flag is visible near the building among the following options: United Kingdom, Russia, United States, or France?\nChoices:\n(A) united kingdom\n(B) russia\n(C) united states\n(D) france" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 12.8046875, "loss": 4.006685256958008 }, { "id": "identity_139704", "image": "DocVQA/images/jzgj0223_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total assets value for the Annual Finance Standalone Dec-09?" }, { "from": "gpt", "value": "The answer is 307.6" } ], "clip_score": 26.03125, "loss": 2.4544460773468018 }, { "id": "identity_38424", "image": "UniGeo/images/1773.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC为钝角三角形,将△ABC绕点A按逆时针方向旋转120°得到△AB′C′,连接BB′,若AC′∥BB′,则∠CAB′的度数为()\nChoices:\n(A) 45°\n(B) 60°\n(C) 70°\n(D) 90°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.15625, "loss": 2.7609243392944336 }, { "id": "identity_67012", "image": "ScienceQA/images/17376.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary feathers can be seen on the bird's left wing?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 20.59375, "loss": 2.241589069366455 }, { "id": "identity_229173", "image": "TextVQA/images/5477f897677ff2b0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the artists involved in the album and what are their roles?" }, { "from": "gpt", "value": "The answer is Masato Mori (guitar/vocal), Tomoyuki Fujii (alto sax/flute)" } ], "clip_score": 27.359375, "loss": 2.190150499343872 }, { "id": "identity_78003", "image": "AI2D/images/4121.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the roots as shown in the image?" }, { "from": "gpt", "value": "The answer is Absorb water and minerals" } ], "clip_score": 24.421875, "loss": 1.6441677808761597 }, { "id": "identity_214256", "image": "Super-CLEVR/images/superCLEVR_new_000414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles in the image that are the same color as the plane?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.984375, "loss": 3.4834113121032715 }, { "id": "identity_6675", "image": "GeoQA+/images/1771.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在等边△ABC中,D、E分别是边AB、BC的中点,DE=2,则△ABC的周长为()\nChoices:\n(A) 9\n(B) 12\n(C) 16\n(D) 18" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.765625, "loss": 2.5380513668060303 }, { "id": "identity_60158", "image": "FigureQA/images/95986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Dark Red at x-axis label 60?" }, { "from": "gpt", "value": "The answer is 48" } ], "clip_score": 25.734375, "loss": 2.8944478034973145 }, { "id": "identity_21401", "image": "TabMWP/images/30862.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Students at Kingwood Middle School wrote and submitted poems for a district-wide writing contest. How many students wrote fewer than 4 poems?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 29.203125, "loss": 2.3046364784240723 }, { "id": "identity_287013", "image": "DocVQA/images/xhhk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much does a round trip from Champaign to Columbus cost?" }, { "from": "gpt", "value": "The answer is $71.06" } ], "clip_score": 23.8125, "loss": 2.4658122062683105 }, { "id": "identity_323201", "image": "PMC-VQA/images/PMC6711278_f1_29028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What did the colour Doppler show?\nChoices:\n(A) Central vascularity \n(B) Peripheral vascularity \n(C) No vascularity \n(D) Interstitial vascularity" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.453125, "loss": 4.789353847503662 }, { "id": "identity_94468", "image": "TabMWP/images/10856.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more pieces of mail were delivered on Tuesday than on Thursday?" }, { "from": "gpt", "value": "The answer is 552" } ], "clip_score": 37.40625, "loss": 2.427966594696045 }, { "id": "identity_225719", "image": "UniGeo/images/355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,点C在⊙O上,若∠ACB=35°,且弧AC的长度是弧CB的两倍,则∠AOB等于多少度?" }, { "from": "gpt", "value": "The answer is 240°" } ], "clip_score": 33.375, "loss": 2.168407917022705 }, { "id": "identity_321017", "image": "IconQA/images/56201.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first image is a sub. What is the fourth image?\nChoices:\n(A) fish\n(B) turtle\n(C) sub" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.4375, "loss": 3.893759250640869 }, { "id": "identity_156145", "image": "ScienceQA/images/17200.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the chair have armrests?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.453125, "loss": 3.3777928352355957 }, { "id": "identity_332816", "image": "A-OKVQA/images/440032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of passenger service can be found on this street?\nChoices:\n(A) tram\n(B) subway\n(C) ferry\n(D) bus" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.828125, "loss": 4.14669942855835 }, { "id": "identity_32276", "image": "Super-CLEVR/images/superCLEVR_new_000606.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there fewer big blue metal utility bikes than tiny brown metallic tandem bikes?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 20.65625, "loss": 4.77104377746582 }, { "id": "identity_107590", "image": "VQA2.0/images/155309.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the street indicated by the sign at the top?" }, { "from": "gpt", "value": "The answer is Prescott Ave" } ], "clip_score": 25.5, "loss": 2.093902587890625 }, { "id": "identity_264464", "image": "IconQA/images/88484.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine if the number of stars is even or odd.\nChoices:\n(A) even\n(B) odd" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.984375, "loss": 4.054062843322754 }, { "id": "identity_132568", "image": "TextVQA/images/354909ac2c1850be.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many banners are visible on the building facade?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 20.625, "loss": 2.310312032699585 }, { "id": "identity_282433", "image": "PMC-VQA/images/PMC7442462_F2_238763.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of lesion, besides the ones mentioned, was detected in the brain MRI?\nChoices:\n(A) Hypointense lesion \n(B) Hyperintense lesion \n(C) Isointense lesion \n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.546875, "loss": 3.8969082832336426 }, { "id": "identity_294398", "image": "PMC-VQA/images/PMC6832264_jcm-08-01608-f001_62402.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of MRI scans were utilized for the diagnosis?\nOptions:\n(A) T1 weighted images\n(B) T3 weighted images\n(C) T2 weighted images\n(D) T4 weighted images" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.375, "loss": 3.961542844772339 }, { "id": "identity_231807", "image": "FigureQA/images/17631.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage difference between Dark Salmon and Maroon?" }, { "from": "gpt", "value": "The answer is 20%" } ], "clip_score": 35.5, "loss": 1.6693873405456543 }, { "id": "identity_328686", "image": "TextVQA/images/2d867feeb54a7eea.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of this?" }, { "from": "gpt", "value": "The answer is bigger" } ], "clip_score": 13.2890625, "loss": 3.657144069671631 }, { "id": "identity_155586", "image": "ScienceQA/images/14177.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What river can be seen flowing through the canyon in this image?" }, { "from": "gpt", "value": "The answer is Colorado River" } ], "clip_score": 30.75, "loss": 2.400538682937622 }, { "id": "identity_302590", "image": "PlotQA/images/83735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do the lines representing expenditure in healthcare and R&D intersect?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.75, "loss": 2.878622531890869 }, { "id": "identity_207211", "image": "A-OKVQA/images/382659.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the body of water shown in the image?" }, { "from": "gpt", "value": "The answer is A river" } ], "clip_score": 18.59375, "loss": 3.3685295581817627 }, { "id": "identity_36066", "image": "FigureQA/images/32678.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Salmon the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.40625, "loss": 4.120534896850586 }, { "id": "identity_208100", "image": "A-OKVQA/images/26260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many raw meat pieces are there on the green plate?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 16.765625, "loss": 4.32177209854126 }, { "id": "identity_274179", "image": "PMC-VQA/images/PMC6713910_F6_30138.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging technique was utilized to differentiate between various cell types in the brain?\nChoices:\n(A) X-ray imaging\n(B) MRI\n(C) Double-labeling immunofluorescence\n(D) CT scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.984375, "loss": 4.106487274169922 }, { "id": "identity_41448", "image": "FigureQA/images/7786.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the proportion of the Medium Purple section in the pie chart?" }, { "from": "gpt", "value": "The answer is 2/3 or approximately 66.67%" } ], "clip_score": 38.625, "loss": 1.2642347812652588 }, { "id": "identity_247545", "image": "Super-CLEVR/images/superCLEVR_new_000890.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many things are either brown things on the left side of the rubber road bike or small yellow things, and how many things are either red things on the right side of the rubber road bike or large purple things?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.34375, "loss": 2.438037157058716 }, { "id": "identity_190075", "image": "A-OKVQA/images/516239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the license plate number of the truck?" }, { "from": "gpt", "value": "The answer is UP36E6877" } ], "clip_score": 18.953125, "loss": 3.4280431270599365 }, { "id": "identity_234244", "image": "VQA2.0/images/238982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are there in the image, and how many of them are interacting with the horse?" }, { "from": "gpt", "value": "The answer is 11 people in total, 1 interacting with the horse" } ], "clip_score": 22.984375, "loss": 2.059673309326172 }, { "id": "identity_110310", "image": "VizWiz/images/VizWiz_train_00012707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the item in the image?" }, { "from": "gpt", "value": "The answer is 1 meter by 1 meter" } ], "clip_score": 13.2421875, "loss": 2.9477736949920654 }, { "id": "identity_6882", "image": "ScienceQA/images/15409.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.96875, "loss": 3.453312635421753 }, { "id": "identity_205383", "image": "Super-CLEVR/images/superCLEVR_new_000245.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object that is in front of the large pink object?" }, { "from": "gpt", "value": "The answer is Irregular" } ], "clip_score": 12.0, "loss": 5.234525680541992 }, { "id": "identity_11918", "image": "DocVQA/images/skcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Chairman of Committee On Health Care Of The Poor?" }, { "from": "gpt", "value": "The answer is Robert C. Long, MD" } ], "clip_score": 22.421875, "loss": 2.450727939605713 }, { "id": "identity_333697", "image": "VQA2.0/images/458172.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this usable?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.4140625, "loss": 2.8710625171661377 }, { "id": "identity_228415", "image": "Geometry3K/images/315.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given that the triangles are similar, find the length of side \\( QS \\)" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 35.125, "loss": 2.9067256450653076 }, { "id": "identity_71778", "image": "ScienceQA/images/19469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how many degrees Celsius did the temperature increase in the greenhouse with solar panels?" }, { "from": "gpt", "value": "The answer is 10°C" } ], "clip_score": 26.640625, "loss": 1.8875573873519897 }, { "id": "identity_135819", "image": "DocVQA/images/yhgk0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: To whom is the letter addressed?" }, { "from": "gpt", "value": "The answer is Dr. O. E. May" } ], "clip_score": 21.546875, "loss": 2.8672738075256348 }, { "id": "identity_184121", "image": "A-OKVQA/images/196104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the advertisement on the fence behind the batter?" }, { "from": "gpt", "value": "The answer is It is not legible" } ], "clip_score": 23.09375, "loss": 3.4534997940063477 }, { "id": "identity_147872", "image": "DocVQA/images/knmc0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the Dean of the School of Medicine at University of California, Davis?" }, { "from": "gpt", "value": "The answer is C. J. Tupper" } ], "clip_score": 27.0, "loss": 2.75516939163208 }, { "id": "identity_147329", "image": "MapQA/images/map_12426.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of doors in California?" }, { "from": "gpt", "value": "The answer is 50-56" } ], "clip_score": 29.484375, "loss": 2.297940492630005 }, { "id": "identity_60552", "image": "DocVQA/images/snyc0227_139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mean Albumin/Globulin ratio for pregnant females aged 45+?" }, { "from": "gpt", "value": "The answer is 1.0" } ], "clip_score": 22.3125, "loss": 1.772216558456421 }, { "id": "identity_228162", "image": "FigureQA/images/12275.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: By how much is Olive Drab greater than Navy Blue?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 22.203125, "loss": 1.8273236751556396 }, { "id": "identity_46142", "image": "DVQA/images/bar_train_00076367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the least number of people preferring 'honey'?" }, { "from": "gpt", "value": "The answer is closet" } ], "clip_score": 35.28125, "loss": 3.3456075191497803 }, { "id": "identity_60493", "image": "DocVQA/images/pslc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what time will the council committees meet?" }, { "from": "gpt", "value": "The answer is from 8:00 A.M. until 9:30 A.M" } ], "clip_score": 22.328125, "loss": 1.1208157539367676 }, { "id": "identity_257807", "image": "TextVQA/images/2910c9f42a440563.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what are the names of all sponsors visible on the scoreboard?" }, { "from": "gpt", "value": "The answer is Pepsi, United, Sharp, Verizon, Budweiser" } ], "clip_score": 31.84375, "loss": 1.8960113525390625 }, { "id": "identity_55562", "image": "FigureQA/images/92015.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points are there for the color Sienna?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 25.3125, "loss": 2.7368533611297607 }, { "id": "identity_280344", "image": "UniGeo/images/2511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, points A, B, and C lie on circle O, with angle OAB measuring 22.5°. What is the measure of angle ACB in degrees?\nChoices:\n(A) 11.5°\n(B) 112.5°\n(C) 122.5°\n(D) 135°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.625, "loss": 2.6950905323028564 }, { "id": "identity_314042", "image": "A-OKVQA/images/524255.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What entertainment do these individuals have to amuse them?\nChoices:\n(A) opera\n(B) tv\n(C) horseback tricks\n(D) singing" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.203125, "loss": 4.319945335388184 }, { "id": "identity_111239", "image": "Super-CLEVR/images/superCLEVR_new_001259.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned directly between the tandem bike and the scooter?" }, { "from": "gpt", "value": "The answer is A motorcycle" } ], "clip_score": 25.625, "loss": 3.7375521659851074 }, { "id": "identity_266670", "image": "Geometry3K/images/158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the measure of angle Y.\nOptions:\n(A) 18\n(B) 36\n(C) 72\n(D) 144" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.03125, "loss": 2.9226057529449463 }, { "id": "identity_254206", "image": "TextVQA/images/172813812931e912.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different comic book titles are displayed on the middle shelf, and which characters are featured on the covers of these comics?" }, { "from": "gpt", "value": "The answer is Six titles; characters are Zombie, Catwoman, Iron Man, Hulk, Simpsons, and Boba Fett" } ], "clip_score": 28.890625, "loss": 2.834643840789795 }, { "id": "identity_255286", "image": "TextVQA/images/500c81a0419347d3.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the digits on the jersey number and the number of players visible in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 15.46875, "loss": 2.596910238265991 }, { "id": "identity_203415", "image": "A-OKVQA/images/22222.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pedestrian crossing lines are fully visible on the street?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 21.09375, "loss": 2.3861637115478516 }, { "id": "identity_120033", "image": "VQA-AS/images/1679.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the tree leaves?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 18.15625, "loss": 3.3521246910095215 }, { "id": "identity_259244", "image": "PlotQA/images/151364.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average percentage of the economically active population across all years?" }, { "from": "gpt", "value": "The answer is 60.0" } ], "clip_score": 22.828125, "loss": 2.217001438140869 }, { "id": "identity_165853", "image": "Geometry3K/images/95.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ACB?" }, { "from": "gpt", "value": "The answer is 95°" } ], "clip_score": 33.0, "loss": 2.688652515411377 }, { "id": "identity_208843", "image": "A-OKVQA/images/305901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of transportation is partially visible behind the person holding the phone?" }, { "from": "gpt", "value": "The answer is Bicycle" } ], "clip_score": 28.296875, "loss": 2.1733715534210205 }, { "id": "identity_298837", "image": "A-OKVQA/images/565264.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the reason for placing some bottles horizontally?\nChoices:\n(A) for mixing\n(B) moisten cork\n(C) keep warm\n(D) less space" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.96875, "loss": 4.108570098876953 }, { "id": "identity_264490", "image": "AI2D/images/3516.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, which component is represented by A?\nChoices:\n(A) cell wall\n(B) central vacuole\n(C) microtubule\n(D) plasmadesma" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.859375, "loss": 4.2735490798950195 }, { "id": "identity_304888", "image": "DocVQA/images/nlcn0226_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who are the primary contributors to pollution?" }, { "from": "gpt", "value": "The answer is Department of defence and its military contractors" } ], "clip_score": 18.96875, "loss": 3.8347537517547607 }, { "id": "identity_249151", "image": "VQA2.0/images/197561.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different types of items are attached to the fridge, and what are they?" }, { "from": "gpt", "value": "The answer is three types: magnets, stickers, and papers" } ], "clip_score": 27.03125, "loss": 2.286907911300659 }, { "id": "identity_254281", "image": "UniGeo/images/2499.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,AB是⊙O的直径,C、D是圆上两点,∠CBA=70°,若∠CAD=30°,则∠D的度数为多少?" }, { "from": "gpt", "value": "The answer is 40°" } ], "clip_score": 31.109375, "loss": 1.9305860996246338 }, { "id": "identity_14563", "image": "UniGeo/images/384.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,C、D是以线段AB为直径的⊙O上两点,若AC=CD,且∠ACD=50°,则∠BAC的度数为()\nChoices:\n(A) 20°\n(B) 35°\n(C) 25°\n(D) 30°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.203125, "loss": 2.6070239543914795 }, { "id": "identity_301350", "image": "ScienceQA/images/14763.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of these organisms contains matter that was once part of this?\nChoices:\n(A) orca\n(B) zooplankton" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.234375, "loss": 3.777066707611084 }, { "id": "identity_78030", "image": "AI2D/images/3712.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the final part of the digestive system before waste is expelled?" }, { "from": "gpt", "value": "The answer is anus" } ], "clip_score": 30.953125, "loss": 2.937300443649292 }, { "id": "identity_119713", "image": "PMC-VQA/images/PMC5234261_Fig7_113992.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is highlighted in green in this image?" }, { "from": "gpt", "value": "The answer is Prostate gland" } ], "clip_score": 27.015625, "loss": 3.0605149269104004 }, { "id": "identity_273171", "image": "TabMWP/images/24188.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the function shown in the table linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.203125, "loss": 4.468297958374023 }, { "id": "identity_175822", "image": "TabMWP/images/37414.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of cookies eaten by all customers last week?" }, { "from": "gpt", "value": "The answer is 255 cookies" } ], "clip_score": 36.5, "loss": 2.200719118118286 }, { "id": "identity_182669", "image": "PMC-VQA/images/PMC6215415_BIO035386F6_390991.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the MRI image taken in a sagittal, axial, or coronal plane?" }, { "from": "gpt", "value": "The answer is Coronal plane" } ], "clip_score": 34.125, "loss": 2.557871103286743 }, { "id": "identity_72335", "image": "AI2D/images/4332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which type of vegetable is represented by both a purple and a green variety in the image?" }, { "from": "gpt", "value": "The answer is Cabbage and Lettuces" } ], "clip_score": 25.96875, "loss": 2.4634106159210205 }, { "id": "identity_66495", "image": "AI2D/images/649.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total radius of the Earth as labeled in the diagram?" }, { "from": "gpt", "value": "The answer is 6450 km" } ], "clip_score": 31.203125, "loss": 1.9653030633926392 }, { "id": "identity_58592", "image": "PlotQA/images/100738.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the trend of DPT vaccinations increasing or decreasing over the years?" }, { "from": "gpt", "value": "The answer is Increasing" } ], "clip_score": 26.8125, "loss": 2.6827943325042725 }, { "id": "identity_84414", "image": "Geometry3K/images/207.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the measure of angle AED" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 30.828125, "loss": 3.4382402896881104 }, { "id": "identity_244792", "image": "A-OKVQA/images/442630.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify three features in the image that indicate the area is suburban, and explain how each feature supports this classification" }, { "from": "gpt", "value": "The answer is Residential buildings, sidewalk and street layout, open spaces and trees" } ], "clip_score": 27.03125, "loss": 3.448056697845459 }, { "id": "identity_269676", "image": "Geometry3K/images/549.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If A B is perpendicular to B C, what is the measure of angle 6?\nChoices:\n(A) 21\n(B) 26\n(C) 31\n(D) 36" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.125, "loss": 2.7566115856170654 }, { "id": "identity_112147", "image": "A-OKVQA/images/137748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the car in the foreground?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 14.9765625, "loss": 3.0877175331115723 }, { "id": "identity_104717", "image": "PMC-VQA/images/PMC8601058_fig-3_115220.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was likely used to capture this image?" }, { "from": "gpt", "value": "The answer is Scanning electron microscopy (SEM)" } ], "clip_score": 34.0625, "loss": 1.3090999126434326 }, { "id": "identity_39444", "image": "FigureQA/images/81854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is Aqua's value ever above 90?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 23.875, "loss": 3.0649027824401855 }, { "id": "identity_195279", "image": "PMC-VQA/images/PMC6837801_F0002_64083.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the intensity of the cerebrospinal fluid (CSF) in the image?" }, { "from": "gpt", "value": "The answer is Hypointense (dark)" } ], "clip_score": 31.390625, "loss": 3.0202813148498535 }, { "id": "identity_253238", "image": "Super-CLEVR/images/superCLEVR_new_000820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the relative size of the metallic thing in front of the metal thing behind the gray metallic bicycle compared to the yellow vehicle?" }, { "from": "gpt", "value": "The answer is much smaller" } ], "clip_score": 28.90625, "loss": 3.2933995723724365 }, { "id": "identity_266303", "image": "TQA/images/abc_question_images/skin_cross_section_17583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location can the highest amount of adipose tissue be found?\nChoices:\n(A) L\n(B) P\n(C) K\n(D) M" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.453125, "loss": 4.092408657073975 }, { "id": "identity_8520", "image": "A-OKVQA/images/296544.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What main dish is served here?\nChoices:\n(A) flower soup\n(B) pizza\n(C) flower cake\n(D) meat stew" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 21.515625, "loss": 4.377851486206055 }, { "id": "identity_95736", "image": "IconQA/images/18849.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more paper clips would you need to make a chain that is twice as long as the line in the image?" }, { "from": "gpt", "value": "The answer is 3 paper clips fit along the line, so you would need 3 x 2 = 6 paper clips in total. There are 5 paper clips shown, so you would need 6 - 5 = 1 more paper clip" } ], "clip_score": 31.15625, "loss": 1.2818886041641235 }, { "id": "identity_185421", "image": "VQA-AS/images/18896.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many items are on the picnic blanket?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 19.359375, "loss": 2.6753127574920654 }, { "id": "identity_161002", "image": "GeoQA+/images/3720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle AEF if angle ABD is 60 degrees?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 31.703125, "loss": 1.754704475402832 }, { "id": "identity_41889", "image": "FigureQA/images/46212.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value represented by the Dim Gray bar?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 25.65625, "loss": 2.6573119163513184 }, { "id": "identity_334438", "image": "DocVQA/images/xnlc0228_53.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is the \"Finlay Institute\" located?" }, { "from": "gpt", "value": "The answer is CUBA" } ], "clip_score": 15.3984375, "loss": 2.9221301078796387 }, { "id": "identity_231297", "image": "UniGeo/images/901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,BC是⊙O的弦,OA⊥BC,∠AOB=70°,若AD是⊙O的切线,则∠ADC的度数是多少?" }, { "from": "gpt", "value": "The answer is 27.5°" } ], "clip_score": 32.0625, "loss": 1.8861067295074463 }, { "id": "identity_251281", "image": "ScienceQA/images/5028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees around its vertical axis, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is (B) attract" } ], "clip_score": 24.578125, "loss": 3.051910877227783 }, { "id": "identity_282798", "image": "CLEVR-Math/images/CLEVR_train_035347.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After subtracting all the gray shiny blocks and all the large gray metallic objects, how many objects remain?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 30.140625, "loss": 2.295832395553589 }, { "id": "identity_191220", "image": "PMC-VQA/images/PMC5797361_Fig1_271410.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of medical imaging method was used to capture this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 31.3125, "loss": 2.7574989795684814 }, { "id": "identity_10291", "image": "Super-CLEVR/images/superCLEVR_new_001263.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of large blue things are made of the same material as the tiny truck?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 27.1875, "loss": 2.0696818828582764 }, { "id": "identity_246210", "image": "TabMWP/images/14152.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Whitney need to buy 2 cell phones, 1 big screen TV, and 3 MP3 players?" }, { "from": "gpt", "value": "The answer is 2902" } ], "clip_score": 26.09375, "loss": 2.8695597648620605 }, { "id": "identity_279521", "image": "VQA2.0/images/43001.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a lot of traffic on the road?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 25.46875, "loss": 3.057908296585083 }, { "id": "identity_53970", "image": "DocVQA/images/gxxh0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of men initially recruited across all centers?" }, { "from": "gpt", "value": "The answer is 545" } ], "clip_score": 18.15625, "loss": 2.392247200012207 }, { "id": "identity_15421", "image": "VizWiz/images/VizWiz_train_00011085.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is it?" }, { "from": "gpt", "value": "The answer is refrigerator" } ], "clip_score": 26.40625, "loss": 1.7417634725570679 }, { "id": "identity_2892", "image": "GeoQA+/images/3782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AD是∠CAE的平分线,∠B=45°,∠DAE=75°,则∠ACD为()\nChoices:\n(A) 105°\n(B) 85°\n(C) 60°\n(D) 75°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.125, "loss": 2.99409818649292 }, { "id": "identity_43776", "image": "MapQA/images/map_3824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage value for the state of California?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 24.265625, "loss": 2.7310636043548584 }, { "id": "identity_320346", "image": "FigureQA/images/42051.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this the high median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.640625, "loss": 4.4274582862854 }, { "id": "identity_272477", "image": "AI2D/images/35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which letter on the diagram corresponds to the egg stage? \n(A) B\n(B) D\n(C) C\n(D) A" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 16.78125, "loss": 3.4796226024627686 }, { "id": "identity_33074", "image": "ScienceQA/images/14237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probability that a bitter melon plant produced by this cross will have light brown seeds?\nChoices:\n(A) 2/4\n(B) 4/4\n(C) 1/4\n(D) 3/4\n(E) 0/4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 24.15625, "loss": 3.3685221672058105 }, { "id": "identity_250043", "image": "TextVQA/images/17d2127854340e4f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the brand of the monitor and the brand of the phone shown in the image?" }, { "from": "gpt", "value": "The answer is Dell monitor and Apple phone" } ], "clip_score": 34.96875, "loss": 2.3184962272644043 }, { "id": "identity_231514", "image": "Geometry3K/images/1362.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the perimeter of the triangle. Round to the nearest tenth" }, { "from": "gpt", "value": "The answer is 180.0" } ], "clip_score": 33.15625, "loss": 2.308607339859009 }, { "id": "identity_297552", "image": "DocVQA/images/kqdd0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Where is Broadus N. Buttler located?" }, { "from": "gpt", "value": "The answer is president" } ], "clip_score": 17.796875, "loss": 4.894017219543457 }, { "id": "identity_254503", "image": "A-OKVQA/images/524044.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the purpose of the toilet placed in this location, considering the surrounding environment and the activities of the people present?" }, { "from": "gpt", "value": "The answer is Part of an art installation or public demonstration" } ], "clip_score": 25.671875, "loss": 2.814412832260132 }, { "id": "identity_186822", "image": "TextVQA/images/6a2f00b32601f7e7.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the liquid in the dark bottle?" }, { "from": "gpt", "value": "The answer is Dark brown or black" } ], "clip_score": 23.140625, "loss": 3.1085901260375977 }, { "id": "identity_110342", "image": "TextVQA/images/043a59888f5bcb7b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any detergent boxes that are the same size as the 'Sun' detergent box?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 26.34375, "loss": 3.2088840007781982 }, { "id": "identity_304694", "image": "VQA-RAD/images/synpic27597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do these areas show calcification?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.71875, "loss": 3.4212646484375 }, { "id": "identity_231835", "image": "TextVQA/images/64291d1eff2aaa10.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the current score difference between the two teams, and how much time has elapsed in the first quarter?" }, { "from": "gpt", "value": "The answer is 5 points, 1 minute and 52 seconds" } ], "clip_score": 20.578125, "loss": 2.3642544746398926 }, { "id": "identity_258810", "image": "ScienceQA/images/20869.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the aquatic animal that is not a fish from the following options: \n(A) woodpecker\n(B) green moray eel\n(C) penguin\n(D) fire salamander" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.078125, "loss": 3.693995237350464 }, { "id": "identity_264969", "image": "VizWiz/images/VizWiz_train_00001572.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which brand of pocket knife is this?" }, { "from": "gpt", "value": "The answer is unanswerable" } ], "clip_score": 28.734375, "loss": 4.163174152374268 }, { "id": "identity_4840", "image": "VizWiz/images/VizWiz_train_00014425.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is it?" }, { "from": "gpt", "value": "The answer is violin case" } ], "clip_score": 35.65625, "loss": 2.5856518745422363 }, { "id": "identity_242246", "image": "FigureQA/images/75264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of the y-axis for the Dark Seafoam and Violet data points?" }, { "from": "gpt", "value": "The answer is Dark Seafoam: 20, Violet: 35" } ], "clip_score": 27.21875, "loss": 1.4794232845306396 }, { "id": "identity_333162", "image": "ChartQA/images/2940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest value in the brown bar?" }, { "from": "gpt", "value": "The answer is 52" } ], "clip_score": 18.0, "loss": 2.904710531234741 }, { "id": "identity_52381", "image": "FigureQA/images/52478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of the Crimson section in the pie chart?" }, { "from": "gpt", "value": "The answer is 12.5%" } ], "clip_score": 33.84375, "loss": 1.315500259399414 }, { "id": "identity_299192", "image": "PMC-VQA/images/PMC7317310_fig1_199546.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of stain was utilized to observe the lung sections among the following options: (A) Fluorescent stain, (B) HE stain, (C) Iodine stain, or (D) Gram stain?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.53125, "loss": 4.743011951446533 }, { "id": "identity_145209", "image": "DVQA/images/bar_train_00090797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total value for the category \"left\"?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 20.453125, "loss": 2.641026258468628 }, { "id": "identity_181120", "image": "IconQA/images/105866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many shapes are in the image in total?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 26.234375, "loss": 2.235560417175293 }, { "id": "identity_74651", "image": "ScienceQA/images/12295.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the body of water located to the northeast of the orange dot?" }, { "from": "gpt", "value": "The answer is Lake Erie" } ], "clip_score": 29.84375, "loss": 3.58949613571167 }, { "id": "identity_67364", "image": "AI2D/images/3247.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure indicated by the label \"Rough endoplasmic reticulum\"?" }, { "from": "gpt", "value": "The answer is Rough endoplasmic reticulum" } ], "clip_score": 32.09375, "loss": 1.0004122257232666 }, { "id": "identity_33518", "image": "A-OKVQA/images/561037.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What kind of organization is the white square sticker featuring in the motorcycle?\nChoices:\n(A) motors club\n(B) hospital\n(C) insurance\n(D) bank" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.328125, "loss": 4.328670024871826 }, { "id": "identity_50054", "image": "FigureQA/images/87469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point on the x-axis does the Orange line intersect with the Periwinkle line?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 23.75, "loss": 1.975315809249878 }, { "id": "identity_5801", "image": "DVQA/images/bar_train_00181695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars horizontal?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 21.59375, "loss": 3.8494181632995605 }, { "id": "identity_139027", "image": "FigureQA/images/79099.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of x-axis values for Dark Seafoam?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 29.515625, "loss": 3.01326322555542 }, { "id": "identity_170790", "image": "TabMWP/images/23066.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Ethan buys one of each item, how much money does he spend in total?" }, { "from": "gpt", "value": "The answer is $4.55" } ], "clip_score": 26.5, "loss": 2.219683885574341 }, { "id": "identity_285267", "image": "TextVQA/images/c5227a830da85f77.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the author?" }, { "from": "gpt", "value": "The answer is jutta profijt" } ], "clip_score": 15.125, "loss": 2.9079275131225586 }, { "id": "identity_239265", "image": "VQA2.0/images/308796.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many people are holding roller suitcases in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.921875, "loss": 2.1376800537109375 }, { "id": "identity_9600", "image": "ScienceQA/images/19553.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these magnets attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.515625, "loss": 3.7431023120880127 }, { "id": "identity_106468", "image": "TextVQA/images/8bddcf083641beeb.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the mobile device's back cover textured or smooth?" }, { "from": "gpt", "value": "The answer is Textured" } ], "clip_score": 24.65625, "loss": 2.7032601833343506 }, { "id": "identity_113962", "image": "TextVQA/images/02d94afde962b96b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brands of cell phone carriers are advertised on the storefront?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 24.0, "loss": 2.208707571029663 }, { "id": "identity_34648", "image": "FigureQA/images/91303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Green have the maximum area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 17.90625, "loss": 4.081444263458252 }, { "id": "identity_200533", "image": "PMC-VQA/images/PMC7519687_Fig3_261740.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the arrow pointing to the shortest peak?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 22.484375, "loss": 3.830230712890625 }, { "id": "identity_140660", "image": "DVQA/images/bar_train_00139303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern of the symbols used in the legend?" }, { "from": "gpt", "value": "The answer is King: circle, March: cross, Square: star" } ], "clip_score": 23.796875, "loss": 2.7964487075805664 }, { "id": "identity_265272", "image": "DocVQA/images/ypvw0217_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the estimated budget for stage-setting for new product categories and support for low dose Prempro?" }, { "from": "gpt", "value": "The answer is $150,000.00" } ], "clip_score": 26.078125, "loss": 1.435365080833435 }, { "id": "identity_2466", "image": "IconQA/images/105972.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is shown? Answer by typing a time word, not a number. It is (_) past nine." }, { "from": "gpt", "value": "The answer is half" } ], "clip_score": 25.375, "loss": 5.280067443847656 }, { "id": "identity_266337", "image": "DVQA/images/bar_train_00046286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the biggest bar?" }, { "from": "gpt", "value": "The answer is 70" } ], "clip_score": 25.046875, "loss": 2.6303045749664307 }, { "id": "identity_109797", "image": "VQA-AS/images/13180.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of ball is on the ground?" }, { "from": "gpt", "value": "The answer is American football" } ], "clip_score": 23.15625, "loss": 2.795405149459839 }, { "id": "identity_294323", "image": "PMC-VQA/images/PMC6119949_brainsci-08-00145-f001_362149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific type of magnetic resonance images were captured for Patient 1?\nChoices:\n(A) T1-weighted axial and T1-weighted midline sagittal brain images \n(B) T2-weighted axial and T1-weighted midline sagittal brain images \n(C) T2-weighted axial and T2-weighted midline sagittal brain images \n(D) T1-weighted axial and T2-weighted midline sagittal brain images" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 34.59375, "loss": 3.2334868907928467 }, { "id": "identity_221954", "image": "A-OKVQA/images/447089.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are visible holding signs in the image?" }, { "from": "gpt", "value": "The answer is Four" } ], "clip_score": 17.265625, "loss": 4.6597065925598145 }, { "id": "identity_283136", "image": "A-OKVQA/images/541493.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following items is connected to the outlet?\n(A) nightlight\n(B) hair dryer\n(C) phone\n(D) electric toothbrush" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.125, "loss": 4.202315807342529 }, { "id": "identity_195414", "image": "PMC-VQA/images/PMC7107010_F1_143306.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any visible abnormalities in the ventricular system?" }, { "from": "gpt", "value": "The answer is No visible abnormalities" } ], "clip_score": 19.625, "loss": 2.89078426361084 }, { "id": "identity_323505", "image": "UniGeo/images/335.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,若∠AOC=70°,且AD∥OC,则∠AOD的度数是多少?\n选项:\n(A) 70°\n(B) 60°\n(C) 50°\n(D) 40°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.609375, "loss": 2.539494037628174 }, { "id": "identity_229195", "image": "ScienceQA/images/6986.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the left magnet is rotated 180 degrees around its vertical axis, will the magnets attract or repel each other?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 22.015625, "loss": 3.03412127494812 }, { "id": "identity_141057", "image": "DVQA/images/bar_train_00011608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest cumulative value of all elements combined?" }, { "from": "gpt", "value": "The answer is event" } ], "clip_score": 26.53125, "loss": 3.232574939727783 }, { "id": "identity_308231", "image": "IconQA/images/13774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The clocks show two things someone did Wednesday after lunch. Which did they do second?\nChoices:\n(A) played soccer\n(B) talked on the phone" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 35.21875, "loss": 3.716341495513916 }, { "id": "identity_216517", "image": "A-OKVQA/images/238385.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any vehicles visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 16.25, "loss": 3.303272247314453 }, { "id": "identity_132992", "image": "VQA2.0/images/448656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the carpet on the ground?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 13.328125, "loss": 3.306170701980591 }, { "id": "identity_36074", "image": "PMC-VQA/images/PMC6787576_diagnostics-09-00068-f002_49386.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the structure indicated by the arrow?\nChoices:\n(A) Chromatin clump \n(B) Nuclear pore \n(C) Mitochondria \n(D) Brush border" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 20.765625, "loss": 4.357096195220947 }, { "id": "identity_270147", "image": "DocVQA/images/ylvw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which project has the scheduled release date set for the third quarter of 2002?" }, { "from": "gpt", "value": "The answer is Totelle CD-ROM 1 and 2mg" } ], "clip_score": 20.96875, "loss": 4.647124767303467 }, { "id": "identity_98851", "image": "TabMWP/images/15241.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median number of red buttons per bag?" }, { "from": "gpt", "value": "The answer is 59" } ], "clip_score": 23.25, "loss": 3.0079710483551025 }, { "id": "identity_209949", "image": "A-OKVQA/images/109450.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the destination displayed on the front of the bus?" }, { "from": "gpt", "value": "The answer is Archer" } ], "clip_score": 24.21875, "loss": 2.6863467693328857 }, { "id": "identity_252372", "image": "AI2D/images/821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If all butterflies die, what will be the subsequent effect on the population of fish and frogs, and how will this impact the bird population?" }, { "from": "gpt", "value": "The answer is Fish and frog populations increase, bird population decreases" } ], "clip_score": 35.1875, "loss": 2.7995147705078125 }, { "id": "identity_138454", "image": "DVQA/images/bar_train_00096324.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the 'ladder' category have a bar with a value greater than 5?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.5, "loss": 3.2436294555664062 }, { "id": "identity_117143", "image": "VQA2.0/images/563999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lemon halves can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.703125, "loss": 4.393246173858643 }, { "id": "identity_196433", "image": "PMC-VQA/images/PMC7564854_F0003_275748.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label at the bottom of the image referring to?" }, { "from": "gpt", "value": "The answer is Supraspinatus muscle" } ], "clip_score": 31.640625, "loss": 2.5437047481536865 }, { "id": "identity_337797", "image": "A-OKVQA/images/464237.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is under these and to the left?\nChoices:\n(A) apple\n(B) cardboard boxes\n(C) bicycle\n(D) scarecrow" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1875, "loss": 4.314013481140137 }, { "id": "identity_295221", "image": "IconQA/images/100136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of circles present?" }, { "from": "gpt", "value": "The answer is 92" } ], "clip_score": 27.0625, "loss": 2.211347818374634 }, { "id": "identity_240565", "image": "TQA/images/question_images/optics_ray_diagrams_9173.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of rays that either pass through the lens without refraction or converge at the focal points after refraction?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 30.96875, "loss": 2.1211555004119873 }, { "id": "identity_270692", "image": "DocVQA/images/lycd0227_16.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is referenced in the document?" }, { "from": "gpt", "value": "The answer is January 1970" } ], "clip_score": 23.515625, "loss": 2.4004619121551514 }, { "id": "identity_108483", "image": "VQA2.0/images/71128.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the animal alone?" }, { "from": "gpt", "value": "The answer is Yes, the animal is alone" } ], "clip_score": 16.875, "loss": 2.3123879432678223 }, { "id": "identity_173422", "image": "IconQA/images/50372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the shapes are located in the bottom row?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 31.234375, "loss": 1.5814013481140137 }, { "id": "identity_175190", "image": "IconQA/images/77562.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the clock is 45 minutes fast, what is the actual time?" }, { "from": "gpt", "value": "The answer is 2:15 PM" } ], "clip_score": 32.09375, "loss": 1.8648325204849243 }, { "id": "identity_207782", "image": "PMC-VQA/images/PMC8165677_Fig1_456707.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality is shown in the image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 31.171875, "loss": 2.942622423171997 }, { "id": "identity_93564", "image": "IconQA/images/85511.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of yellow squares in the top row to the total number of squares?" }, { "from": "gpt", "value": "The answer is 5:8" } ], "clip_score": 32.875, "loss": 1.8757983446121216 }, { "id": "identity_190095", "image": "Super-CLEVR/images/superCLEVR_new_001594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the two cars identical in model?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 24.609375, "loss": 3.1523382663726807 }, { "id": "identity_151709", "image": "ScienceQA/images/3320.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state that is northeast of California?" }, { "from": "gpt", "value": "The answer is Nevada" } ], "clip_score": 28.796875, "loss": 2.742844343185425 }, { "id": "identity_283744", "image": "MapQA/images/map_3519.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of California higher than that of New Jersey?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.625, "loss": 3.21410870552063 }, { "id": "identity_31332", "image": "TabMWP/images/663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: A reporter counted the number of masks local artists made for this year's masquerade ball. How many artists made fewer than 3 masks?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.65625, "loss": 1.8018966913223267 }, { "id": "identity_28403", "image": "Geometry3K/images/1699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find \\angle 7.\nChoices:\n(A) 41\n(B) 82\n(C) 98\n(D) 186" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.078125, "loss": 3.1289279460906982 }, { "id": "identity_72514", "image": "TQA/images/abc_question_images/parts_telescope_17286.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the component labeled 'X'?" }, { "from": "gpt", "value": "The answer is Secondary Mirror or Lens" } ], "clip_score": 33.15625, "loss": 3.8040502071380615 }, { "id": "identity_138183", "image": "DocVQA/images/rrcw0217_18.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the company mentioned at the bottom of the document?" }, { "from": "gpt", "value": "The answer is 189 Wall Street, Princeton, New Jersey 08540" } ], "clip_score": 25.0625, "loss": 1.4237205982208252 }, { "id": "identity_215355", "image": "A-OKVQA/images/218701.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many trees are visible in the background?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 11.4140625, "loss": 4.442111968994141 }, { "id": "identity_212828", "image": "PMC-VQA/images/PMC6955386_Fig2_98357.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the gray matter appear normal in this MRI image?" }, { "from": "gpt", "value": "The answer is Gray matter appears normal" } ], "clip_score": 34.59375, "loss": 3.547154664993286 }, { "id": "identity_246621", "image": "DocVQA/images/hxpp0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address and contact number of the organization where Richard M. Powell works?" }, { "from": "gpt", "value": "The answer is 7315 Wisconsin Avenue, Washington, D.C. 20014, (301) 652-5674" } ], "clip_score": 26.28125, "loss": 1.4559481143951416 }, { "id": "identity_246823", "image": "DocVQA/images/kfhd0227_15.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the member of the Medical School Admissions Committee and also holds a directorial position?" }, { "from": "gpt", "value": "The answer is Dr. Marvin Fishman and Dr. Gerald Morris" } ], "clip_score": 25.109375, "loss": 2.5393335819244385 }, { "id": "identity_70048", "image": "TQA/images/abc_question_images/human_system_circulatory_11381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label indicates the heart?" }, { "from": "gpt", "value": "The answer is H" } ], "clip_score": 29.609375, "loss": 2.692171096801758 }, { "id": "identity_188920", "image": "PMC-VQA/images/PMC4331293_Fig5_359188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What lobe of the brain is primarily involved in the image?" }, { "from": "gpt", "value": "The answer is Temporal lobe" } ], "clip_score": 28.234375, "loss": 2.026444435119629 }, { "id": "identity_182187", "image": "VQA2.0/images/341075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many giraffes are standing in the image?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 28.28125, "loss": 2.2478229999542236 }, { "id": "identity_94125", "image": "IconQA/images/80299.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one bird flies away, how many birds will remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 22.078125, "loss": 2.3829665184020996 }, { "id": "identity_71499", "image": "AI2D/images/1453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phase of the moon during the spring tides shown in this image?" }, { "from": "gpt", "value": "The answer is Full moon and new moon" } ], "clip_score": 28.84375, "loss": 2.975811004638672 }, { "id": "identity_34043", "image": "DocVQA/images/zjlk0226_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: who is the editor of The Coloradon?" }, { "from": "gpt", "value": "The answer is Ms. Barbara Allbrandt" } ], "clip_score": 23.0, "loss": 3.447068929672241 }, { "id": "identity_14213", "image": "CLEVR-Math/images/CLEVR_train_038234.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 0 cyan blocks. How many objects are left?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 20.703125, "loss": 2.2711470127105713 }, { "id": "identity_190861", "image": "PMC-VQA/images/PMC2644319_F2_34592.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of hemorrhage associated with the mass in this CT image?" }, { "from": "gpt", "value": "The answer is No evidence of hemorrhage associated with the mass" } ], "clip_score": 33.78125, "loss": 1.9567967653274536 }, { "id": "identity_273754", "image": "GeoQA+/images/4547.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, AB is the diameter of circle O, and C and D are two points on circle O. If ∠CDB = 20°, what is the degree measure of ∠ABC?\nChoices:\n(A) 60°\n(B) 65°\n(C) 70°\n(D) 75°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.90625, "loss": 2.7612318992614746 }, { "id": "identity_130492", "image": "TextVQA/images/0f2e50dac6fcb058.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any adults playing on the field?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.75, "loss": 3.116851568222046 }, { "id": "identity_337635", "image": "VizWiz/images/VizWiz_train_00019307.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is this?" }, { "from": "gpt", "value": "The answer is bottle" } ], "clip_score": 21.15625, "loss": 2.6464431285858154 }, { "id": "identity_118912", "image": "TextVQA/images/c3bbbfde4e516c27.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the phone number displayed on the billboard?" }, { "from": "gpt", "value": "The answer is 0800 587 6 587" } ], "clip_score": 25.9375, "loss": 1.7527341842651367 }, { "id": "identity_223791", "image": "Super-CLEVR/images/superCLEVR_new_001551.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there an object in the image that is larger than the green bicycle but smaller than the red airplane?" }, { "from": "gpt", "value": "The answer is Yes, the blue car" } ], "clip_score": 34.75, "loss": 3.1587846279144287 }, { "id": "identity_59122", "image": "FigureQA/images/66797.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the minimum value of Purple and at which x-axis label does it occur?" }, { "from": "gpt", "value": "The answer is Minimum value is approximately 99.2 at x-axis label 60" } ], "clip_score": 25.234375, "loss": 2.305725574493408 }, { "id": "identity_106150", "image": "VQA-RAD/images/synpic34836.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to produce this image?" }, { "from": "gpt", "value": "The answer is MRI scan" } ], "clip_score": 32.03125, "loss": 2.6948046684265137 }, { "id": "identity_111324", "image": "TextVQA/images/d8227cdf6d2fb9c5.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the tax seal on the bottle neck?" }, { "from": "gpt", "value": "The answer is Green and white" } ], "clip_score": 22.078125, "loss": 3.5993597507476807 }, { "id": "identity_189793", "image": "A-OKVQA/images/92136.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the swimming pool area?" }, { "from": "gpt", "value": "The answer is An integer number based on the visible count" } ], "clip_score": 15.75, "loss": 4.048472881317139 }, { "id": "identity_89646", "image": "UniGeo/images/462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of the circle with center O and CD is a chord equal to AD, what is the perimeter of triangle ACD?" }, { "from": "gpt", "value": "The answer is 2r + CD" } ], "clip_score": 37.84375, "loss": 2.647444248199463 }, { "id": "identity_62280", "image": "DocVQA/images/jzhd0227_36.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the concern mentioned in section IV regarding resources?" }, { "from": "gpt", "value": "The answer is limited and inadequate" } ], "clip_score": 23.609375, "loss": 3.518012285232544 }, { "id": "identity_260280", "image": "TextVQA/images/e5f246059e8f7c11.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the item and what does the top of the item say?" }, { "from": "gpt", "value": "The answer is The item is made of wood and says \"FERNSEH GMBH" } ], "clip_score": 21.78125, "loss": 2.253636598587036 }, { "id": "identity_25787", "image": "Geometry3K/images/1845.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find x. A = 92 in^2.\nChoices:\n(A) 4.2\n(B) 6.3\n(C) 8.4\n(D) 10.5" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.140625, "loss": 2.6339218616485596 }, { "id": "identity_127183", "image": "A-OKVQA/images/457725.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the officer wearing a helmet?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 26.953125, "loss": 3.4988982677459717 }, { "id": "identity_168638", "image": "IconQA/images/20353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the angle between the hour hand and the minute hand?" }, { "from": "gpt", "value": "The answer is 217.5 degrees" } ], "clip_score": 31.90625, "loss": 2.2095956802368164 }, { "id": "identity_79313", "image": "GeoQA+/images/3135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AD is the median of triangle ABC, E and F are the midpoints of AD and BE respectively, and the area of triangle BFD is 6, what is the length of AC if BC is 12 units long?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 34.03125, "loss": 2.349745273590088 }, { "id": "identity_76204", "image": "TQA/images/question_images/cycle_water_1501.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What process is indicated by the red arrow coming from the trees?" }, { "from": "gpt", "value": "The answer is Transpiration" } ], "clip_score": 28.359375, "loss": 2.3185017108917236 }, { "id": "identity_99330", "image": "TabMWP/images/21944.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of menu items per restaurant?" }, { "from": "gpt", "value": "The answer is No mode" } ], "clip_score": 21.5, "loss": 3.637619972229004 }, { "id": "identity_245547", "image": "TQA/images/question_images/parts_neuron_7220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the pathway of an electrical impulse starting from the dendrites and ending at the axon terminals, including all major structures it passes through" }, { "from": "gpt", "value": "The answer is Dendrites, Cell Body, Axon Hillock, Axon, Myelin Sheath, Nodes of Ranvier, Schwann Cells, Axon Terminals" } ], "clip_score": 30.140625, "loss": 1.0715386867523193 }, { "id": "identity_95942", "image": "TabMWP/images/23864.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year did Jada take the most vacation days?" }, { "from": "gpt", "value": "The answer is 2011" } ], "clip_score": 42.15625, "loss": 1.5424914360046387 }, { "id": "identity_74354", "image": "ScienceQA/images/15217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can the first object be used to measure the circumference of the second object?" }, { "from": "gpt", "value": "The answer is Yes, but not accurately" } ], "clip_score": 28.75, "loss": 2.911508321762085 }, { "id": "identity_3242", "image": "A-OKVQA/images/136644.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is inside the white cups of the people?\nChoices:\n(A) beer\n(B) wine\n(C) tea\n(D) juice" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 13.1875, "loss": 3.735116958618164 }, { "id": "identity_35602", "image": "IconQA/images/104541.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of stars even or odd?\nChoices:\n(A) odd\n(B) even" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.609375, "loss": 4.173402786254883 }, { "id": "identity_326308", "image": "DocVQA/images/hjkm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the check?" }, { "from": "gpt", "value": "The answer is 96889" } ], "clip_score": 16.09375, "loss": 3.0918049812316895 }, { "id": "identity_140747", "image": "PlotQA/images/45336.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of years represented in the graph?" }, { "from": "gpt", "value": "The answer is 1960 to 1962" } ], "clip_score": 26.09375, "loss": 1.9711889028549194 }, { "id": "identity_231529", "image": "DocVQA/images/fpbx0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money payable over the duration of the grant?" }, { "from": "gpt", "value": "The answer is $5,000.00" } ], "clip_score": 21.5, "loss": 1.5471382141113281 }, { "id": "identity_190467", "image": "TextVQA/images/315d2d6c1d1c5c3d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phrase does the character exclaim after stepping on something in the bottom left panel?" }, { "from": "gpt", "value": "The answer is Splat!" } ], "clip_score": 21.828125, "loss": 2.2132108211517334 }, { "id": "identity_16180", "image": "IconQA/images/88603.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What has been done to this letter?\nChoices:\n(A) slide\n(B) flip\n(C) turn" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.96875, "loss": 4.483338832855225 }, { "id": "identity_179086", "image": "IconQA/images/76427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many units up has the letter 'L' been moved?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 27.078125, "loss": 2.100905179977417 }, { "id": "identity_177134", "image": "TabMWP/images/36774.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What was the total number of entries for all the years combined?" }, { "from": "gpt", "value": "The answer is 223" } ], "clip_score": 27.390625, "loss": 2.196843385696411 }, { "id": "identity_60252", "image": "DocVQA/images/llml0226_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the letter?" }, { "from": "gpt", "value": "The answer is July 11, 1945" } ], "clip_score": 22.625, "loss": 1.8706445693969727 }, { "id": "identity_216286", "image": "A-OKVQA/images/176621.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many umbrellas are part of the show's entrance decoration?" }, { "from": "gpt", "value": "The answer is Six" } ], "clip_score": 28.703125, "loss": 4.0793867111206055 }, { "id": "identity_260927", "image": "IconQA/images/45264.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Pam got on the bus 30 minutes later than shown on her clock and Tom got on the bus 15 minutes earlier than shown on his clock, who got on the bus first?" }, { "from": "gpt", "value": "The answer is Pam" } ], "clip_score": 22.703125, "loss": 2.851879596710205 }, { "id": "identity_106892", "image": "PMC-VQA/images/PMC6137085_F1_367491.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is this image showing a healthy bone or signs of a bone disease?" }, { "from": "gpt", "value": "The answer is Healthy bone" } ], "clip_score": 28.265625, "loss": 2.777323007583618 }, { "id": "identity_146919", "image": "DocVQA/images/lglg0227_41.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two elements measured in the graphs?" }, { "from": "gpt", "value": "The answer is Sodium and Potassium" } ], "clip_score": 18.640625, "loss": 2.6677887439727783 }, { "id": "identity_241418", "image": "FigureQA/images/30403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Rank the colors from the largest to the smallest section in the pie chart" }, { "from": "gpt", "value": "The answer is Khaki, Peru, Cyan, Dark Orange" } ], "clip_score": 39.875, "loss": 1.5990928411483765 }, { "id": "identity_182112", "image": "A-OKVQA/images/144018.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 14.9921875, "loss": 2.5337908267974854 }, { "id": "identity_41404", "image": "FigureQA/images/9237.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of Turquoise in the pie chart?" }, { "from": "gpt", "value": "The answer is Turquoise is slightly more than 50%" } ], "clip_score": 33.40625, "loss": 1.911454439163208 }, { "id": "identity_57332", "image": "FigureQA/images/29892.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the title of the pie chart provided?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 33.34375, "loss": 3.4842891693115234 }, { "id": "identity_277482", "image": "A-OKVQA/images/555738.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: For what purpose is the long pipe in the ground most likely intended for? \n(A) hiking\n(B) irrigation\n(C) decoration\n(D) sports" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 10.4296875, "loss": 4.662630081176758 }, { "id": "identity_269248", "image": "DocVQA/images/hqmg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is responsible for sending a photocopy of the receiving slip?" }, { "from": "gpt", "value": "The answer is mr. cullen" } ], "clip_score": 22.234375, "loss": 2.7684173583984375 }, { "id": "identity_219652", "image": "PMC-VQA/images/PMC7073646_jcm-09-00303-f002_134954.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to create this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 30.84375, "loss": 2.952016830444336 }, { "id": "identity_301489", "image": "UniGeo/images/2407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that △ABC is an inscribed triangle in circle O, with AB=30, AC=24, and the height from BC to A is 20, what is the radius of circle O?\n\nChoices:\n(A) 16\n(B) 18\n(C) 20\n(D) 24" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.15625, "loss": 2.4371533393859863 }, { "id": "identity_22268", "image": "A-OKVQA/images/420496.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which photographic technique was used to capture the flow of traffic?\nChoices:\n(A) panoramic\n(B) vignetting\n(C) bokeh\n(D) time-lapse" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.25, "loss": 3.556180238723755 }, { "id": "identity_277957", "image": "PMC-VQA/images/PMC7203408_Fig1_164568.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the tumor located among the following choices: (A) Upper thoracic esophagus, (B) Middle thoracic esophagus, (C) Lower thoracic esophagus, or (D) Abdominal esophagus?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.140625, "loss": 3.7535452842712402 }, { "id": "identity_47893", "image": "MapQA/images/map_11636.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with data not available (N/A)?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 23.28125, "loss": 2.915154218673706 }, { "id": "identity_141022", "image": "ChartQA/images/two_col_104488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the percentage of people who are neither satisfied nor dissatisfied greater than the percentage of people who are very dissatisfied?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 31.3125, "loss": 3.3694374561309814 }, { "id": "identity_112182", "image": "TextVQA/images/00211783004d27f6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many containers with yellow caps can be seen in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 11.828125, "loss": 4.493313789367676 }, { "id": "identity_225781", "image": "IconQA/images/69083.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If each paper clip is 2 cm long, what is the total length of the line in centimeters?" }, { "from": "gpt", "value": "The answer is 6 cm" } ], "clip_score": 29.671875, "loss": 1.9759225845336914 }, { "id": "identity_184326", "image": "PMC-VQA/images/PMC6764633_acn350836-fig-0001_42421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the imaging technique used for this image?" }, { "from": "gpt", "value": "The answer is MRI (Magnetic Resonance Imaging)" } ], "clip_score": 34.90625, "loss": 1.042346715927124 }, { "id": "identity_102767", "image": "TabMWP/images/11372.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in price between a four-inch nail and a brass clip?" }, { "from": "gpt", "value": "The answer is $0.05" } ], "clip_score": 34.84375, "loss": 1.849189281463623 }, { "id": "identity_251491", "image": "GeoQA+/images/5227.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知∠BOF=120°, ∠A + ∠C + ∠E = 180°, and ∠B + ∠D + ∠F = 180°, 求∠A + ∠B + ∠C + ∠D + ∠E + ∠F的值" }, { "from": "gpt", "value": "The answer is 360°" } ], "clip_score": 29.671875, "loss": 1.3977001905441284 }, { "id": "identity_288790", "image": "PMC-VQA/images/PMC6466781_Fig3_465993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which eye did the cataract deteriorate?\nChoices:\n(A) The right eye\n(B) The left eye\n(C) Both eyes\n(D) None of the above" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 18.15625, "loss": 3.579402208328247 }, { "id": "identity_69112", "image": "ScienceQA/images/4673.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What habitat is the bird in the image most likely to be found in?" }, { "from": "gpt", "value": "The answer is Wetlands, lakes, and rivers" } ], "clip_score": 24.203125, "loss": 1.9719505310058594 }, { "id": "identity_16755", "image": "VQA2.0/images/338258.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: From those whose whole bodies are visible, are they all the same gender?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.3125, "loss": 4.1012797355651855 }, { "id": "identity_283259", "image": "VQA2.0/images/215819.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Do clouds exist in the sky?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 13.5625, "loss": 3.200669765472412 }, { "id": "identity_334543", "image": "PMC-VQA/images/PMC7016374_fig0003_116039.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of MRI image is shown here?\nChoices:\n(A) T2-weighted\n(B) FLAIR\n(C) DWI\n(D) T1-weighted" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.328125, "loss": 3.6986942291259766 }, { "id": "identity_183807", "image": "A-OKVQA/images/518816.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the image suggest it is?" }, { "from": "gpt", "value": "The answer is Either early morning or late afternoon" } ], "clip_score": 19.125, "loss": 3.062469005584717 }, { "id": "identity_152298", "image": "ScienceQA/images/16495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly west of Virginia?" }, { "from": "gpt", "value": "The answer is West Virginia" } ], "clip_score": 26.40625, "loss": 2.2792558670043945 }, { "id": "identity_95435", "image": "IconQA/images/52332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the blue section to the black section?" }, { "from": "gpt", "value": "The answer is 1:2" } ], "clip_score": 29.0625, "loss": 1.4779002666473389 }, { "id": "identity_165309", "image": "UniGeo/images/1979.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is the diameter of the circle with center O and ∠BAD = 20°, what is the measure of ∠BOD?" }, { "from": "gpt", "value": "The answer is 160°" } ], "clip_score": 36.21875, "loss": 1.5057262182235718 }, { "id": "identity_143564", "image": "DVQA/images/bar_train_00082061.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the algorithm night have a higher accuracy in the dataset part than in the dataset stake?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 35.5, "loss": 2.9005725383758545 }, { "id": "identity_43848", "image": "FigureQA/images/75575.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Violet Red across the x-axis?" }, { "from": "gpt", "value": "The answer is 55" } ], "clip_score": 25.34375, "loss": 2.5131094455718994 }, { "id": "identity_48562", "image": "PlotQA/images/19442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average revenue generated from taxes by Japan across the given years?" }, { "from": "gpt", "value": "The answer is (4+6+8+6)/4 = 6%" } ], "clip_score": 28.328125, "loss": 2.3449783325195312 }, { "id": "identity_214511", "image": "VizWiz/images/VizWiz_train_00015061.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many candies can be seen through the jar?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 26.5, "loss": 4.6774821281433105 }, { "id": "identity_134870", "image": "ChartQA/images/10303.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the median percentage of people who view U.S. economic aid as mostly negative?" }, { "from": "gpt", "value": "The answer is 15%" } ], "clip_score": 25.78125, "loss": 2.2692065238952637 }, { "id": "identity_219037", "image": "TextVQA/images/442d372e1f977999.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What accessory is the person on the left holding in their left hand?" }, { "from": "gpt", "value": "The answer is A large pocket watch" } ], "clip_score": 26.90625, "loss": 3.604339838027954 }, { "id": "identity_229889", "image": "A-OKVQA/images/358462.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the steps involved in preparing the horse to pull the wagon, and why is each step necessary?" }, { "from": "gpt", "value": "The answer is To distribute load, connect, secure, check health, and guide" } ], "clip_score": 27.890625, "loss": 4.710339069366455 }, { "id": "identity_200481", "image": "PMC-VQA/images/PMC3583842_f4-ijo-41-04-1260_188597.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What magnification is likely used to capture this image?" }, { "from": "gpt", "value": "The answer is 20x to 40x magnification" } ], "clip_score": 23.28125, "loss": 1.6147868633270264 }, { "id": "identity_216704", "image": "A-OKVQA/images/397186.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many dogs can be seen participating in the event?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.046875, "loss": 2.0569982528686523 }, { "id": "identity_81951", "image": "UniGeo/images/2115.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the image, what is the ratio of the area of the shaded region to the area of the entire hexagon formed after triangle ABD is moved to the right to form triangle A'B'D'?" }, { "from": "gpt", "value": "The answer is 1/3" } ], "clip_score": 24.71875, "loss": 1.423060417175293 }, { "id": "identity_329678", "image": "PMC-VQA/images/PMC6462305_f0004_464919.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where in the brain are these resolving hypointense areas located?\nChoices:\n(A) Frontal region\n(B) Temporal region\n(C) Occipital region\n(D) Parietal region" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.640625, "loss": 4.328497886657715 }, { "id": "identity_297176", "image": "A-OKVQA/images/418944.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: At what temperature is the item being held by the girl when it is in full use? \nOptions:\n(A) room temperature\n(B) 20 degrees\n(C) cold\n(D) hot" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 14.546875, "loss": 3.744459629058838 }, { "id": "identity_223819", "image": "DVQA/images/bar_train_00134045.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold for each item across all stores, and which item has the highest total sales?" }, { "from": "gpt", "value": "The answer is lead" } ], "clip_score": 30.1875, "loss": 4.602908134460449 }, { "id": "identity_58302", "image": "FigureQA/images/2490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have equal sections?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.375, "loss": 3.6632049083709717 }, { "id": "identity_279262", "image": "TextVQA/images/57e60e7639b3d40d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the FM station number for ESPN at this arena?" }, { "from": "gpt", "value": "The answer is 106.5" } ], "clip_score": 18.421875, "loss": 1.5004055500030518 }, { "id": "identity_328443", "image": "PMC-VQA/images/PMC2478714_pone-0002853-g002_25662.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the arrow indicate in this image?\nChoices:\n(A) blood vessels\n(B) heart structures\n(C) tail structures\n(D) brain structures" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.71875, "loss": 4.731350421905518 }, { "id": "identity_136536", "image": "DocVQA/images/lmmf0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the state and zip code for the hotel?" }, { "from": "gpt", "value": "The answer is Florida 33316" } ], "clip_score": 28.21875, "loss": 1.6985324621200562 }, { "id": "identity_118147", "image": "VQA-AS/images/1449.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object the dog is running towards?" }, { "from": "gpt", "value": "The answer is Pink" } ], "clip_score": 27.390625, "loss": 3.7050819396972656 }, { "id": "identity_323830", "image": "GeoQA+/images/1965.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,菱形的面积为24,对角线交于点O,E是BC边的中点,EF垂直于BD于点F,EG垂直于AC于点G,则四边形EFOG的面积为()\n选择:\n(A) 3\n(B) 5\n(C) 6\n(D) 8" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 25.4375, "loss": 2.5131382942199707 }, { "id": "identity_30203", "image": "CLEVR-Math/images/CLEVR_train_046310.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all objects. Subtract all red matte cylinders. How many objects are left?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 27.0, "loss": 2.242802381515503 }, { "id": "identity_31565", "image": "A-OKVQA/images/4551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of dinosaur does this represent?\nChoices:\n(A) triceratops\n(B) t-rex\n(C) brontosaur\n(D) paradactyl" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 10.703125, "loss": 4.096927642822266 }, { "id": "identity_88087", "image": "GeoQA+/images/5294.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the hypotenuse of the triangle with the 60° angle is 10 units, what is the length of the side opposite the 60° angle?" }, { "from": "gpt", "value": "The answer is 5√3 units" } ], "clip_score": 31.46875, "loss": 1.9204001426696777 }, { "id": "identity_46049", "image": "FigureQA/images/43390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate percentage of the Aqua section in the pie chart?" }, { "from": "gpt", "value": "The answer is Approximately 25%" } ], "clip_score": 34.5625, "loss": 1.8743709325790405 }, { "id": "identity_52152", "image": "FigureQA/images/70043.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value reached by the Medium Seafoam line?" }, { "from": "gpt", "value": "The answer is 47" } ], "clip_score": 24.40625, "loss": 2.841379165649414 }, { "id": "identity_101513", "image": "CLEVR-Math/images/CLEVR_train_025491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are metallic?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.015625, "loss": 2.40351939201355 }, { "id": "identity_103826", "image": "A-OKVQA/images/255026.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a visible chimney on the roof of the house?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 17.34375, "loss": 3.3049569129943848 }, { "id": "identity_335527", "image": "PMC-VQA/images/PMC7349964_vaccines-08-00240-f005_210150.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of staining was used in this examination?\nChoices:\n(A) PAS staining\n(B) Gram staining\n(C) H&E staining\n(D) Immunohistochemistry" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.015625, "loss": 3.6761703491210938 }, { "id": "identity_80908", "image": "Geometry3K/images/623.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of x in the isosceles triangle ABC?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 36.96875, "loss": 2.8901455402374268 }, { "id": "identity_309267", "image": "CLEVR-Math/images/CLEVR_train_006576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Remove all green cylinders. Remove all gray cubes. How many items remain?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.390625, "loss": 2.1853883266448975 }, { "id": "identity_131904", "image": "TextVQA/images/0d97042c0644f681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the juice in the glass?" }, { "from": "gpt", "value": "The answer is Orange" } ], "clip_score": 18.53125, "loss": 3.0141043663024902 }, { "id": "identity_239656", "image": "DVQA/images/bar_train_00034949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total sum of the values represented by the mediumturquoise color across all categories?" }, { "from": "gpt", "value": "The answer is 28" } ], "clip_score": 25.125, "loss": 2.15748929977417 }, { "id": "identity_263115", "image": "FigureQA/images/90355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is darker, Gray or Dark Gold?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 17.875, "loss": 4.558090686798096 }, { "id": "identity_258028", "image": "VizWiz/images/VizWiz_train_00012163.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the different types of vegetation visible in the lawn, and how many distinct types can you identify?" }, { "from": "gpt", "value": "The answer is four distinct types" } ], "clip_score": 22.875, "loss": 4.563845634460449 }, { "id": "identity_38848", "image": "FigureQA/images/75676.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Periwinkle at x-axis label 80?" }, { "from": "gpt", "value": "The answer is approximately 56" } ], "clip_score": 23.578125, "loss": 3.20146107673645 }, { "id": "identity_332825", "image": "ScienceQA/images/7116.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What characteristic fits this item?\nChoices:\n(A) bouncy\n(B) blue" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.0625, "loss": 4.218008995056152 }, { "id": "identity_289114", "image": "PMC-VQA/images/PMC3546017_F4_179296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which diagnostic tool was utilized to capture an image of the tumor? \nOptions:\n(A) MRI\n(B) X-Ray\n(C) CT scan\n(D) Ultrasound" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 33.8125, "loss": 4.058653831481934 }, { "id": "identity_120349", "image": "A-OKVQA/images/402715.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Sunny" } ], "clip_score": 17.296875, "loss": 2.827427864074707 }, { "id": "identity_322564", "image": "TextVQA/images/03170e6f23d22c9f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is this?" }, { "from": "gpt", "value": "The answer is rolex" } ], "clip_score": 26.546875, "loss": 3.1462724208831787 }, { "id": "identity_303838", "image": "A-OKVQA/images/535519.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of color scheme is used in the image?\nChoices:\n(A) cold filter\n(B) monochromatic\n(C) full color\n(D) sepia" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 14.9453125, "loss": 4.584738254547119 }, { "id": "identity_259151", "image": "A-OKVQA/images/448046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Based on the attire of the two people riding the tandem bike, what event likely took place before they got married, and what is the significance of their attire in relation to the event?" }, { "from": "gpt", "value": "The answer is They participated in a wedding ceremony" } ], "clip_score": 14.125, "loss": 3.8258917331695557 }, { "id": "identity_236620", "image": "DVQA/images/bar_train_00171689.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the object \"noon\" across all categories, and how does this compare to the total number of people who prefer the object \"motor\" across all categories?" }, { "from": "gpt", "value": "The answer is Motor\" is preferred by more people than \"noon" } ], "clip_score": 28.3125, "loss": 2.769331455230713 }, { "id": "identity_311657", "image": "TabMWP/images/17789.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: During health class, the students reported their fast food consumption last month. How many students ate fast food 3 or more times?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 31.578125, "loss": 2.4245145320892334 }, { "id": "identity_235883", "image": "TQA/images/question_images/human_system_respiratory_1403.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many primary bronchi are there in the human respiratory system, and how do they relate to the number of lungs?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 32.9375, "loss": 3.16796875 }, { "id": "identity_274201", "image": "GeoQA+/images/2171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, triangle ABC is an inscribed triangle in circle O. If angle C is 60°, what is the measure of angle AOB?\nChoices:\n(A) 30°\n(B) 60°\n(C) 90°\n(D) 120°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 36.375, "loss": 2.7371251583099365 }, { "id": "identity_230042", "image": "DVQA/images/bar_train_00160851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer either the cross or the temple, and what percentage of the total number of people surveyed does this represent?" }, { "from": "gpt", "value": "The answer is 13 people, approximately 68.42%" } ], "clip_score": 27.34375, "loss": 2.57077693939209 }, { "id": "identity_18843", "image": "UniGeo/images/690.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC与△ADE是位似图形,且相似比为2:3,若△ABC的面积为18,则△ADE的面积为()\nChoices:\n(A) 6\n(B) 8\n(C) 9\n(D) 12" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.21875, "loss": 2.3562850952148438 }, { "id": "identity_326085", "image": "A-OKVQA/images/195079.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of window is the person taking this photo looking through?\nChoices:\n(A) side\n(B) windshield\n(C) rear\n(D) front" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.078125, "loss": 4.287609577178955 }, { "id": "identity_101035", "image": "IconQA/images/14741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the nail including the head and the point to the nearest inch?" }, { "from": "gpt", "value": "The answer is 3 inches" } ], "clip_score": 34.40625, "loss": 2.1098532676696777 }, { "id": "identity_261896", "image": "AI2D/images/3827.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is the location where polypeptides encoded by messenger RNA are assembled?\nChoices:\n(A) Nucleus.\n(B) Ribosome.\n(C) Chloroplast.\n(D) Mitochondria." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.609375, "loss": 3.8675026893615723 }, { "id": "identity_145078", "image": "MapQA/images/map_9860.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest value in the Midwest region?" }, { "from": "gpt", "value": "The answer is State Name with the lightest color in the Midwest" } ], "clip_score": 30.0625, "loss": 3.124450445175171 }, { "id": "identity_203177", "image": "VQA2.0/images/37863.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are the umbrellas in the photo?" }, { "from": "gpt", "value": "The answer is Beige" } ], "clip_score": 25.4375, "loss": 3.387528419494629 }, { "id": "identity_85548", "image": "GeoQA+/images/393.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given the lines AB and CD intersect at point O and OE is perpendicular to AB at O, if the measure of angle COE is 55 degrees, what is the measure of angle EOD?" }, { "from": "gpt", "value": "The answer is 35 degrees" } ], "clip_score": 34.65625, "loss": 1.6572117805480957 }, { "id": "identity_7633", "image": "VQA-RAD/images/synpic24878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is anything irregular in this image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.765625, "loss": 2.3426718711853027 }, { "id": "identity_76601", "image": "ScienceQA/images/13076.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the ecosystem in the image differ from a typical ocean surface ecosystem?" }, { "from": "gpt", "value": "The answer is Relies on chemosynthesis, not photosynthesis" } ], "clip_score": 23.78125, "loss": 2.7574291229248047 }, { "id": "identity_88817", "image": "UniGeo/images/171.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the grassland covered by the asphalt path?" }, { "from": "gpt", "value": "The answer is 8 + 4*pi m^2" } ], "clip_score": 24.71875, "loss": 3.4880404472351074 }, { "id": "identity_112353", "image": "VQA2.0/images/329245.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bowl containing the soup?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 25.234375, "loss": 3.0085484981536865 }, { "id": "identity_257579", "image": "PMC-VQA/images/PMC7096069_FIG6_141121.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key anatomical structures visible in the imaging modality used for the follow-up at three months, and what is the significance of these structures in the context of the patient's condition?" }, { "from": "gpt", "value": "The answer is Vertebrae, intervertebral discs, spinal cord; assess damage or degeneration" } ], "clip_score": 30.15625, "loss": 2.718651056289673 }, { "id": "identity_267129", "image": "DocVQA/images/xtkg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of the contract?" }, { "from": "gpt", "value": "The answer is DA-49-007-MD 544" } ], "clip_score": 17.546875, "loss": 3.117645025253296 }, { "id": "identity_180810", "image": "CLEVR-Math/images/CLEVR_train_035616.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are there in total?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 19.078125, "loss": 1.9376293420791626 }, { "id": "identity_16775", "image": "TextVQA/images/c8b854a3b86c86c1.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the screen telling us to eat?" }, { "from": "gpt", "value": "The answer is white" } ], "clip_score": 11.046875, "loss": 4.863823413848877 }, { "id": "identity_233371", "image": "AI2D/images/1897.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the primary producer in the food web and explain the flow of energy from the primary producer to the top predator, detailing each step and organism involved" }, { "from": "gpt", "value": "The answer is Phytoplankton → Zooplankton → Small Fish → Mackerel/Snapper Fish → Large Shark/Fisherman" } ], "clip_score": 33.25, "loss": 1.3018124103546143 }, { "id": "identity_303299", "image": "VQA2.0/images/103393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color are these?" }, { "from": "gpt", "value": "The answer is orange" } ], "clip_score": 12.8359375, "loss": 3.5172359943389893 }, { "id": "identity_272054", "image": "Super-CLEVR/images/superCLEVR_new_000311.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can any green metal choppers be seen?" }, { "from": "gpt", "value": "The answer is False" } ], "clip_score": 18.703125, "loss": 4.848790645599365 }, { "id": "identity_319769", "image": "PlotQA/images/20820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average CO2 emissions from some fuel consumption per year?" }, { "from": "gpt", "value": "The answer is 6445.363666666667" } ], "clip_score": 30.84375, "loss": 2.224485158920288 }, { "id": "identity_115328", "image": "TextVQA/images/6ed58c763f550262.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What genre of stories does this publication feature?" }, { "from": "gpt", "value": "The answer is Exciting Adventures - Fiction" } ], "clip_score": 18.75, "loss": 2.528360605239868 }, { "id": "identity_230615", "image": "VQA-RAD/images/synpic51774.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any signs of cardiomegaly (enlarged heart) in this X-ray?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.875, "loss": 2.8378746509552 }, { "id": "identity_225226", "image": "UniGeo/images/579.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,用铁皮做一个圆锥形的烟囱帽,它的底面直径是80cm,高是30cm,不计加工余料,则需用铁皮的面积最少是()。请计算该圆锥形烟囱帽的侧面积和底面积之和。" }, { "from": "gpt", "value": "The answer is 3600π cm²" } ], "clip_score": 21.40625, "loss": 2.1675148010253906 }, { "id": "identity_100208", "image": "TabMWP/images/14628.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cities have more than 35 billboards?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 17.859375, "loss": 2.229564905166626 }, { "id": "identity_20768", "image": "DVQA/images/bar_train_00098070.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest accuracy reported in the whole chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 23.984375, "loss": 3.5821781158447266 }, { "id": "identity_2681", "image": "Geometry3K/images/20.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find y so that the quadrilateral is a parallelogram.\nChoices:\n(A) 16\n(B) 32\n(C) 74\n(D) 80" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.625, "loss": 2.658226728439331 }, { "id": "identity_286025", "image": "ScienceQA/images/8698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which continent is being indicated by the highlight?\nOptions:\n(A) Africa\n(B) Europe\n(C) Asia\n(D) Antarctica" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 24.890625, "loss": 4.377270698547363 }, { "id": "identity_149735", "image": "DocVQA/images/rgmg0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percent authorized per account for JOHN M BREWER JR?" }, { "from": "gpt", "value": "The answer is 100.00" } ], "clip_score": 23.296875, "loss": 1.2129924297332764 }, { "id": "identity_167136", "image": "IconQA/images/68509.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shape is folded along the dashed line, will both halves match perfectly?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 25.875, "loss": 3.089062213897705 }, { "id": "identity_20178", "image": "TabMWP/images/31936.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much money does Darrell need to buy a guitar, a piano, and a trombone?" }, { "from": "gpt", "value": "The answer is 6,783" } ], "clip_score": 31.015625, "loss": 2.7984957695007324 }, { "id": "identity_186742", "image": "PMC-VQA/images/PMC5694160_F2_241941.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the significance of the letter 'D' in the corner of the image?" }, { "from": "gpt", "value": "The answer is It indicates the image's order in a series" } ], "clip_score": 22.890625, "loss": 3.310762643814087 }, { "id": "identity_12831", "image": "GeoQA+/images/5230.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线a与直线b被直线c所截,b⊥c,垂足为点A,∠1=60°,若使直线b与直线a平行,则可将直线b绕着点A顺时针旋转()\nChoices:\n(A) 60°\n(B) 40°\n(C) 30°\n(D) 20°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.609375, "loss": 2.682957887649536 }, { "id": "identity_125834", "image": "VQA-AS/images/18217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cloud is depicted in the sky?" }, { "from": "gpt", "value": "The answer is Cumulus" } ], "clip_score": 16.140625, "loss": 2.760502576828003 }, { "id": "identity_162144", "image": "Geometry3K/images/1806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle ABC in the given trapezoid ABCD?" }, { "from": "gpt", "value": "The answer is 120" } ], "clip_score": 33.5625, "loss": 2.3869876861572266 }, { "id": "identity_39261", "image": "FigureQA/images/71512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value for Magenta on the y-axis label?" }, { "from": "gpt", "value": "The answer is 96" } ], "clip_score": 29.90625, "loss": 2.9409286975860596 }, { "id": "identity_87851", "image": "GeoQA+/images/1911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠ADC = 35°, what is the measure of ∠ACB?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 31.734375, "loss": 1.969582200050354 }, { "id": "identity_56189", "image": "DocVQA/images/mldg0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the last action mentioned under the development of food standards?" }, { "from": "gpt", "value": "The answer is Insure wholesomeness" } ], "clip_score": 22.390625, "loss": 3.58129620552063 }, { "id": "identity_272014", "image": "ScienceQA/images/18698.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Virginia?\nChoices:\n(A) Norfolk\n(B) Des Moines\n(C) Richmond\n(D) Charleston" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.1875, "loss": 4.013455867767334 }, { "id": "identity_21511", "image": "A-OKVQA/images/314904.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Who originally created this sport?\nChoices:\n(A) shaq o'neal\n(B) james naismith\n(C) michael jordan\n(D) wayne gretzky" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 23.765625, "loss": 4.458064079284668 }, { "id": "identity_183845", "image": "A-OKVQA/images/542681.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What other brand besides GARNIER is visible in the image?" }, { "from": "gpt", "value": "The answer is KIA MOTORS" } ], "clip_score": 19.328125, "loss": 1.7077410221099854 }, { "id": "identity_129485", "image": "VQA-RAD/images/synpic15006.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any evidence of a pneumothorax in this image?" }, { "from": "gpt", "value": "The answer is No evidence of pneumothorax" } ], "clip_score": 33.34375, "loss": 2.411163568496704 }, { "id": "identity_131166", "image": "TextVQA/images/e294293fec6c5350.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bottles are visible in the background?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 18.25, "loss": 4.316595077514648 }, { "id": "identity_277248", "image": "ScienceQA/images/17801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Indiana?\nChoices:\n(A) Springfield\n(B) Saint Paul\n(C) Indianapolis\n(D) Fort Wayne" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.46875, "loss": 4.155711650848389 }, { "id": "identity_333418", "image": "DVQA/images/bar_train_00166800.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is chosen by the fewest people in total across all categories?" }, { "from": "gpt", "value": "The answer is error" } ], "clip_score": 28.15625, "loss": 3.197751998901367 }, { "id": "identity_38035", "image": "PMC-VQA/images/PMC3773898_fig4_231613.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What part of the body was operated in Case 2? \nChoices:\n(A) Lumbar spine \n(B) Thoracic spine \n(C) Cervical spine \n(D) Sacral spine " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.671875, "loss": 4.2790327072143555 }, { "id": "identity_89195", "image": "GEOS/images/025.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the arc designated by x if the radius of the circle is 10 units?" }, { "from": "gpt", "value": "The answer is (5/18) * π * 10, or approximately 8.73 units (rounded to two decimal places)" } ], "clip_score": 32.625, "loss": 1.6682617664337158 }, { "id": "identity_43523", "image": "DVQA/images/bar_train_00077048.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values for \"guest\" across all three categories?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 24.0625, "loss": 2.624039888381958 }, { "id": "identity_20590", "image": "Super-CLEVR/images/superCLEVR_new_000248.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the purple rubber object?" }, { "from": "gpt", "value": "The answer is school bus" } ], "clip_score": 29.609375, "loss": 3.723851442337036 }, { "id": "identity_47813", "image": "FigureQA/images/98811.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in value between Periwinkle and Light Slate at x-axis value 40?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 29.640625, "loss": 2.2972006797790527 }, { "id": "identity_258167", "image": "DocVQA/images/msbw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of days between the date the 1st draft was sent to Daniele and the date Daniele's comments were sent to Gary?" }, { "from": "gpt", "value": "The answer is 19 days" } ], "clip_score": 23.859375, "loss": 2.0712084770202637 }, { "id": "identity_23943", "image": "VQA2.0/images/149921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are they doing?" }, { "from": "gpt", "value": "The answer is eating" } ], "clip_score": 18.046875, "loss": 2.309149980545044 }, { "id": "identity_207370", "image": "TextVQA/images/1d8a270fbeb23762.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number on the tax stamp?" }, { "from": "gpt", "value": "The answer is 067849" } ], "clip_score": 14.6015625, "loss": 2.663383722305298 }, { "id": "identity_333068", "image": "PMC-VQA/images/PMC4150633_f0005_316993.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Where is this located?\nChoices:\n(A) Right parietotemporal region\n(B) Bilateral hippocampi\n(C) Left parietotemporal region\n(D) Left hippocampi" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 22.75, "loss": 4.141928195953369 }, { "id": "identity_172633", "image": "TabMWP/images/26503.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many teams have a size that is a multiple of 10?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 31.3125, "loss": 2.726940870285034 }, { "id": "identity_338058", "image": "ChartQA/images/two_col_1592.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What were the earnings in this month?" }, { "from": "gpt", "value": "The answer is 11.29" } ], "clip_score": 23.171875, "loss": 1.950646996498108 }, { "id": "identity_235279", "image": "AI2D/images/4873.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the process of how magma reaches the surface in a composite volcano, starting from the magma chamber" }, { "from": "gpt", "value": "The answer is Magma chamber, conduit, branch pipe, sill, dike, vent, crater, lava flow, ash cloud" } ], "clip_score": 34.875, "loss": 1.8297686576843262 }, { "id": "identity_292093", "image": "DocVQA/images/qjvc0228_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which date is being referred to?" }, { "from": "gpt", "value": "The answer is MAY 17, 1966" } ], "clip_score": 17.6875, "loss": 2.0552427768707275 }, { "id": "identity_196532", "image": "TextVQA/images/1a565a40e6fa8300.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subtitle of the magazine?" }, { "from": "gpt", "value": "The answer is Adapting your strategy to today's fast-changing market" } ], "clip_score": 26.65625, "loss": 1.2049674987792969 }, { "id": "identity_12772", "image": "MapQA/images/map_9086.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does Iowa have the lowest value in the MidWest ?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.796875, "loss": 2.691913604736328 }, { "id": "identity_144665", "image": "DocVQA/images/qxmp0227_8.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What organization is E.J. Alvarez associated with?" }, { "from": "gpt", "value": "The answer is INCAP" } ], "clip_score": 13.390625, "loss": 3.502662420272827 }, { "id": "identity_33366", "image": "VQA-AS/images/15117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the rug?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 22.765625, "loss": 3.4168155193328857 }, { "id": "identity_153175", "image": "AI2D/images/4567.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in axial tilt between Earth and Mars?" }, { "from": "gpt", "value": "The answer is 1.93 degrees" } ], "clip_score": 30.5625, "loss": 2.0521950721740723 }, { "id": "identity_9899", "image": "AI2D/images/4638.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is one of the pests shown in the figure?\nChoices:\n(A) horn fly\n(B) beetle\n(C) ant\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.21875, "loss": 3.7790446281433105 }, { "id": "identity_247930", "image": "TextVQA/images/4c1585f044724f7e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books by Epictetus are shown in the image, and what are their titles?" }, { "from": "gpt", "value": "The answer is Two books, \"Manual for Living\" and \"Discourses" } ], "clip_score": 31.125, "loss": 2.4944047927856445 }, { "id": "identity_308234", "image": "Geometry3K/images/1176.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Given that \\( m \\angle 11 = 4x \\) and \\( m \\angle 12 = 2x - 6 \\), determine the measure of \\( \\angle 12 \\).\n\nChoices:\n(A) 50\n(B) 52\n(C) 56\n(D) 62" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.796875, "loss": 2.7803561687469482 }, { "id": "identity_43203", "image": "DVQA/images/bar_train_00064576.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are preferred by exactly 7 people?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.0, "loss": 2.349726676940918 }, { "id": "identity_126151", "image": "VQA2.0/images/256451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the location in the image commonly associated with?" }, { "from": "gpt", "value": "The answer is Skiing" } ], "clip_score": 25.265625, "loss": 3.0961945056915283 }, { "id": "identity_43709", "image": "DVQA/images/bar_train_00171211.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the preference for 'crisis' in the 'farm' category greater than 50%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 25.828125, "loss": 3.293572187423706 }, { "id": "identity_272774", "image": "TQA/images/question_images/cycle_nitrogen_6720.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following processes involves bringing nitrogen from plants and animals?\n(A) nitrogen-fixing by bacteria\n(B) lightning\n(C) denitrifying of nitrites by bacteria\n(D) decaying of organic matter" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.15625, "loss": 3.7956886291503906 }, { "id": "identity_172276", "image": "TabMWP/images/663.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mode of the number of masks made?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.75, "loss": 2.8082592487335205 }, { "id": "identity_66785", "image": "ScienceQA/images/653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary mineral composition of the rock shown in the image?" }, { "from": "gpt", "value": "The answer is Olivine" } ], "clip_score": 33.5625, "loss": 3.3943488597869873 }, { "id": "identity_199613", "image": "TextVQA/images/19742ee7e740ee20.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the pattern on the background where the clocks are mounted?" }, { "from": "gpt", "value": "The answer is Overlapping clock faces" } ], "clip_score": 23.703125, "loss": 3.2424426078796387 }, { "id": "identity_311123", "image": "UniGeo/images/341.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB,CD是⊙O的直径,DF,BE是弦,若⁀{DF}=⁀{BE},∠B=50°,则∠D的度数是多少?\n选项:\n(A) 25°\n(B) 40°\n(C) 50°\n(D) 60°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.265625, "loss": 2.421990156173706 }, { "id": "identity_178148", "image": "CLEVR-Math/images/CLEVR_train_065196.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more metallic objects or matte objects?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 33.6875, "loss": 3.634747266769409 }, { "id": "identity_235612", "image": "Super-CLEVR/images/superCLEVR_new_000887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many big shiny objects are there in total, and how many of them are of the same shape as the tiny green thing?" }, { "from": "gpt", "value": "The answer is 7 total, 1 same shape" } ], "clip_score": 15.2421875, "loss": 2.9529457092285156 }, { "id": "identity_18430", "image": "MapQA/images/map_10228.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the highest value in the Northeast ?" }, { "from": "gpt", "value": "The answer is 77.7%-97.6%" } ], "clip_score": 21.828125, "loss": 2.307313919067383 }, { "id": "identity_163020", "image": "GeoQA+/images/4672.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle O, what is the measure of angle ABC?" }, { "from": "gpt", "value": "The answer is 155 degrees" } ], "clip_score": 36.21875, "loss": 2.4771509170532227 }, { "id": "identity_106873", "image": "VizWiz/images/VizWiz_train_00017849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many braille cells are there on the device?" }, { "from": "gpt", "value": "The answer is 32" } ], "clip_score": 25.390625, "loss": 2.304069995880127 }, { "id": "identity_76489", "image": "ScienceQA/images/17213.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 32.375, "loss": 3.0273666381835938 }, { "id": "identity_93952", "image": "IconQA/images/17442.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one shape is divided into two equal parts, how many shapes will there be in total?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 29.90625, "loss": 2.1784238815307617 }, { "id": "identity_152260", "image": "ScienceQA/images/5366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the capital of the state that is located directly east of West Virginia?" }, { "from": "gpt", "value": "The answer is Richmond" } ], "clip_score": 23.578125, "loss": 2.9559566974639893 }, { "id": "identity_257576", "image": "ScienceQA/images/12261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the capital of Pennsylvania and then list the capitals of the three states that border Pennsylvania to the west, south, and east" }, { "from": "gpt", "value": "The answer is Harrisburg, Columbus, Annapolis, Trenton" } ], "clip_score": 30.9375, "loss": 2.186995267868042 }, { "id": "identity_333261", "image": "IconQA/images/106314.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many items are there?\nChoices:\n(A) 2\n(B) 3\n(C) 5\n(D) 1\n(E) 4" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 16.171875, "loss": 3.53009033203125 }, { "id": "identity_239771", "image": "DVQA/images/bar_train_00113922.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the three least preferred objects?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 25.609375, "loss": 2.092697858810425 }, { "id": "identity_161929", "image": "GeoQA+/images/591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given circle with center O, if angle ABC is 30 degrees, what is the measure of angle OAC?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 33.59375, "loss": 1.807530164718628 }, { "id": "identity_217387", "image": "TextVQA/images/fc207d586d28ab83.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the author of these books?" }, { "from": "gpt", "value": "The answer is Robin Hobb" } ], "clip_score": 32.21875, "loss": 2.886004686355591 }, { "id": "identity_263403", "image": "GeoQA+/images/2322.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, the radius of circle O is 2, point A is a point on circle O, and radius OD is perpendicular to chord BC at D. If ∠BAC = 60°, what is the length of BC?\nChoices:\n(A) 2√{3}\n(B) √{3}\n(C) 1\n(D) \\frac{√{3}}{2}" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 33.71875, "loss": 2.2973408699035645 }, { "id": "identity_174298", "image": "TabMWP/images/12.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many buildings have more floors than the tallest building with 20 floors but fewer floors than the shortest building with 70 floors?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 23.71875, "loss": 2.3656179904937744 }, { "id": "identity_55503", "image": "FigureQA/images/86255.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Peru at x-axis label 40?" }, { "from": "gpt", "value": "The answer is The value of Peru at x-axis label 40 is 82" } ], "clip_score": 25.03125, "loss": 1.6597766876220703 }, { "id": "identity_123027", "image": "TextVQA/images/0c4fc87e7a636fe8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand of shoes is the player wearing?" }, { "from": "gpt", "value": "The answer is Adidas" } ], "clip_score": 12.984375, "loss": 2.8172032833099365 }, { "id": "identity_180746", "image": "IconQA/images/41404.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many paper clips are there in total?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 30.78125, "loss": 2.1687991619110107 }, { "id": "identity_161401", "image": "GeoQA+/images/5945.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle MEF in the given diagram?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 33.96875, "loss": 2.1729624271392822 }, { "id": "identity_163161", "image": "GeoQA+/images/3526.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If point P divides CD in the ratio 1:2, what is the length of CP given that the side of the square is 8 units?" }, { "from": "gpt", "value": "The answer is 8/3 units" } ], "clip_score": 31.46875, "loss": 1.730746865272522 }, { "id": "identity_262541", "image": "TabMWP/images/8291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of money that Sophie requires to purchase a pair of brown pants and a pair of sweat pants?" }, { "from": "gpt", "value": "The answer is 34" } ], "clip_score": 37.1875, "loss": 2.6771533489227295 }, { "id": "identity_172950", "image": "TabMWP/images/7745.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of red buttons per bag?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 22.0, "loss": 3.1363391876220703 }, { "id": "identity_240307", "image": "TQA/images/abc_question_images/layers_of_atmosphere_18101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the vertical distance between the highest and the lowest letters?" }, { "from": "gpt", "value": "The answer is the vertical distance between P and C" } ], "clip_score": 28.046875, "loss": 2.2963433265686035 }, { "id": "identity_188511", "image": "VQA2.0/images/74090.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the catcher's mitt?" }, { "from": "gpt", "value": "The answer is All-Star" } ], "clip_score": 23.5625, "loss": 2.7081449031829834 }, { "id": "identity_201079", "image": "PMC-VQA/images/PMC9293144_dc24875-fig-0002_343806.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely function of the larger cell in this image?" }, { "from": "gpt", "value": "The answer is Likely a white blood cell (immune response)" } ], "clip_score": 29.265625, "loss": 2.679953098297119 }, { "id": "identity_252107", "image": "PMC-VQA/images/PMC8129590_F5_444179.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing histological features of the tissue sample, and how do they compare to the features of lung, brain, liver, and heart tissues?" }, { "from": "gpt", "value": "The answer is Brain tissue has neurons and glial cells, unlike lung, liver, and heart" } ], "clip_score": 28.125, "loss": 2.396766424179077 }, { "id": "identity_253899", "image": "A-OKVQA/images/427046.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cones are there in total on both sides of the path, and what is the average distance between each cone if the total length of the path is 50 meters?" }, { "from": "gpt", "value": "The answer is 2.5 meters" } ], "clip_score": 22.578125, "loss": 2.7921459674835205 }, { "id": "identity_57965", "image": "DVQA/images/bar_train_00168334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item had the highest sales in the 'line' store?" }, { "from": "gpt", "value": "The answer is vacuum" } ], "clip_score": 26.453125, "loss": 2.512392282485962 }, { "id": "identity_67812", "image": "ScienceQA/images/3617.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the molecular geometry of this molecule?" }, { "from": "gpt", "value": "The answer is Tetrahedral" } ], "clip_score": 30.96875, "loss": 1.4351418018341064 }, { "id": "identity_44207", "image": "DVQA/images/bar_train_00196838.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who preferred the 'tea' category in all objects?" }, { "from": "gpt", "value": "The answer is 23" } ], "clip_score": 26.421875, "loss": 2.484112024307251 }, { "id": "identity_52161", "image": "FigureQA/images/39900.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any other color present in the pie chart besides Orange and Dark Orchid?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 42.46875, "loss": 2.81437087059021 }, { "id": "identity_242056", "image": "AI2D/images/3608.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which label represents the structure that is directly superior to the Trachea and inferior to the Pharynx?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.890625, "loss": 3.5923449993133545 }, { "id": "identity_259911", "image": "IconQA/images/74854.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of empty squares in the frame?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 32.84375, "loss": 2.3462610244750977 }, { "id": "identity_255399", "image": "PMC-VQA/images/PMC7841137_F4_354878.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the key characteristics of a T2-weighted MRI image, and how do they help in differentiating between various brain tissues and pathologies?" }, { "from": "gpt", "value": "The answer is Bright fluids, useful for identifying edema and inflammation" } ], "clip_score": 34.875, "loss": 3.597224712371826 }, { "id": "identity_155260", "image": "ScienceQA/images/5334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many white dots are there on the flower pot?" }, { "from": "gpt", "value": "The answer is 13" } ], "clip_score": 29.84375, "loss": 2.198709487915039 }, { "id": "identity_198533", "image": "TextVQA/images/a6d268ca694fb15e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the brand of the television?" }, { "from": "gpt", "value": "The answer is Philips" } ], "clip_score": 26.625, "loss": 2.7906901836395264 }, { "id": "identity_252904", "image": "A-OKVQA/images/425772.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the item on the floor that is in front of the boat that is behind the red boat, and what is the shape of the item on the floor that is in front of the boat that is behind the red boat?" }, { "from": "gpt", "value": "The answer is Orange, round" } ], "clip_score": 19.328125, "loss": 3.511280059814453 }, { "id": "identity_32865", "image": "A-OKVQA/images/53542.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What color are the rectangular bricks on the very bottom of the tower?\nChoices:\n(A) red\n(B) white\n(C) gray\n(D) blue" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.0, "loss": 4.045264720916748 }, { "id": "identity_27517", "image": "DocVQA/images/fqvw0217_35.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is estrogen effective in treating Hot flushes vs Placebo?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 24.859375, "loss": 2.1014721393585205 }, { "id": "identity_168807", "image": "IconQA/images/37135.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of marbles?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 28.5, "loss": 1.7321220636367798 }, { "id": "identity_171340", "image": "IconQA/images/97473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the spinner is spun 8 times, how many different outcomes can occur?" }, { "from": "gpt", "value": "The answer is 256" } ], "clip_score": 24.5625, "loss": 2.210202693939209 }, { "id": "identity_298305", "image": "ScienceQA/images/4338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which city is the capital of Missouri?\nChoices:\n(A) Saint Louis\n(B) Tallahassee\n(C) Jefferson City\n(D) Des Moines" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.734375, "loss": 3.878429889678955 }, { "id": "identity_229953", "image": "CLEVR-Math/images/CLEVR_train_017607.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many brown things are there, and what is the total number of objects in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 23.9375, "loss": 3.916023015975952 }, { "id": "identity_327441", "image": "A-OKVQA/images/195296.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the bananas doing on this yellow cloth?\nChoices:\n(A) being dried\n(B) being cooked\n(C) being sold\n(D) being eaten" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.546875, "loss": 4.458514213562012 }, { "id": "identity_145708", "image": "DocVQA/images/gtxb0228_19.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What position did William J. Darby hold in 1941-42?" }, { "from": "gpt", "value": "The answer is National Research Council Fellow in the Medical Sciences" } ], "clip_score": 27.640625, "loss": 2.408468008041382 }, { "id": "identity_197348", "image": "A-OKVQA/images/52966.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of pattern is on the woman's clothing on the left?" }, { "from": "gpt", "value": "The answer is Floral pattern" } ], "clip_score": 10.9375, "loss": 2.924264907836914 }, { "id": "identity_39547", "image": "PlotQA/images/83735.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year does the healthcare expenditure appear to be the highest?" }, { "from": "gpt", "value": "The answer is Around 2005" } ], "clip_score": 24.3125, "loss": 2.438138723373413 }, { "id": "identity_281027", "image": "TabMWP/images/12839.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of the number of comic strips that Dean paid attention to in the last 5 days?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 34.1875, "loss": 2.7534563541412354 }, { "id": "identity_317646", "image": "DVQA/images/bar_train_00188483.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the third bar from the left in each set?" }, { "from": "gpt", "value": "The answer is press" } ], "clip_score": 16.8125, "loss": 4.309089183807373 }, { "id": "identity_101297", "image": "IconQA/images/71236.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fraction of the spinner is colored pink?" }, { "from": "gpt", "value": "The answer is 1/8" } ], "clip_score": 33.5625, "loss": 1.578139305114746 }, { "id": "identity_20858", "image": "VQA2.0/images/247049.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is casting a shadow over the people?" }, { "from": "gpt", "value": "The answer is trees" } ], "clip_score": 15.765625, "loss": 3.0854010581970215 }, { "id": "identity_63109", "image": "TQA/images/question_images/human_system_digestive_3783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled at the top of the digestive system?" }, { "from": "gpt", "value": "The answer is Esophagus" } ], "clip_score": 34.53125, "loss": 2.000603437423706 }, { "id": "identity_150811", "image": "ScienceQA/images/4923.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is located directly north of the highlighted continent?" }, { "from": "gpt", "value": "The answer is Panama" } ], "clip_score": 27.296875, "loss": 3.5147459506988525 }, { "id": "identity_88067", "image": "Geometry3K/images/1162.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of the figure?" }, { "from": "gpt", "value": "The answer is 36" } ], "clip_score": 34.25, "loss": 1.9137336015701294 }, { "id": "identity_139728", "image": "DVQA/images/bar_train_00159429.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many datasets have an accuracy of 60% or above for the 'self' algorithm?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 35.96875, "loss": 2.415361166000366 }, { "id": "identity_338072", "image": "ScienceQA/images/9996.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: After the Akkadian Empire ended, many cities and empires tried to control Mesopotamia. Around the 1790s BCE, which group started controlling Mesopotamia?\nChoices:\n(A) the Neo-Sumerian Empire\n(B) the Elamite Empire\n(C) the Babylonian Empire\n(D) the Akkadian Empire" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 32.40625, "loss": 4.006075859069824 }, { "id": "identity_59899", "image": "FigureQA/images/93778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what point on the x-axis does Steel Blue intersect with Medium Seafoam?" }, { "from": "gpt", "value": "The answer is 60" } ], "clip_score": 22.171875, "loss": 2.0262458324432373 }, { "id": "identity_60485", "image": "DocVQA/images/mjph0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of report is presented in the image?" }, { "from": "gpt", "value": "The answer is Water Analysis Report" } ], "clip_score": 24.234375, "loss": 3.212242603302002 }, { "id": "identity_262866", "image": "Geometry3K/images/1368.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If points S, T, and U are the midpoints of line segments DE, EF, and DF respectively, what is the value of y?\nChoices:\n(A) 2.9\n(B) 4.0\n(C) 4.6\n(D) 5.8" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 29.734375, "loss": 2.748656749725342 }, { "id": "identity_39205", "image": "DVQA/images/bar_train_00176940.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value for 'hurry' the same for all three elements?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 18.765625, "loss": 3.284593105316162 }, { "id": "identity_294312", "image": "IconQA/images/42266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which picture corresponds to the ninth image in the sequence, given that the first picture is a house? Choices are:\n(A) paw\n(B) house\n(C) dog" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.296875, "loss": 4.626431941986084 }, { "id": "identity_249995", "image": "A-OKVQA/images/562575.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the likely destination of the bus, and how can you determine this based on the image?" }, { "from": "gpt", "value": "The answer is South Base" } ], "clip_score": 27.65625, "loss": 6.147707462310791 }, { "id": "identity_259694", "image": "ScienceQA/images/17302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Calculate the ratio of the magnetic forces between the magnets in Pair 1 and Pair 2 if the distance between the magnets in each pair is halved. Assume the magnetic force between two magnets is inversely proportional to the square of the distance between them" }, { "from": "gpt", "value": "The answer is The ratio is 1:1" } ], "clip_score": 19.90625, "loss": 2.4545881748199463 }, { "id": "identity_50736", "image": "DocVQA/images/ssjw0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the address of the location mentioned at the bottom of the report?" }, { "from": "gpt", "value": "The answer is 2 Palo Alto Square - Palo Alto, California 94304" } ], "clip_score": 14.53125, "loss": 1.4392062425613403 }, { "id": "identity_313802", "image": "AI2D/images/1407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is represented by this?\nChoices:\n(A) none of the above\n(B) ground level\n(C) subsoil\n(D) bedrock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 26.140625, "loss": 3.553359270095825 }, { "id": "identity_314886", "image": "VQA2.0/images/9321.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is this structure?" }, { "from": "gpt", "value": "The answer is red" } ], "clip_score": 19.671875, "loss": 3.3641059398651123 }, { "id": "identity_127350", "image": "Super-CLEVR/images/superCLEVR_new_000518.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the object directly beneath the yellow dirtbike?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 22.21875, "loss": 3.492150068283081 }, { "id": "identity_159561", "image": "GeoQA+/images/1317.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 30 degrees" } ], "clip_score": 31.203125, "loss": 1.9233348369598389 }, { "id": "identity_300344", "image": "DVQA/images/bar_train_00166019.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of extent in this?" }, { "from": "gpt", "value": "The answer is 8" } ], "clip_score": 19.015625, "loss": 2.864067316055298 }, { "id": "identity_79623", "image": "GeoQA+/images/4782.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ∠AOD = 130° in the given rectangle ABCD, what is the measure of ∠OBC?" }, { "from": "gpt", "value": "The answer is 45°" } ], "clip_score": 35.03125, "loss": 1.8422201871871948 }, { "id": "identity_39329", "image": "DVQA/images/bar_train_00040366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more stars or circles in the 'victim' category?" }, { "from": "gpt", "value": "The answer is Stars" } ], "clip_score": 25.453125, "loss": 3.334805965423584 }, { "id": "identity_261786", "image": "PMC-VQA/images/PMC5972190_F6_319548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which structures are depicted in (A) next to the superior branch of the vestibular nerve?" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 21.671875, "loss": 2.5403552055358887 }, { "id": "identity_105270", "image": "A-OKVQA/images/416308.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are sitting on the beach?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 22.40625, "loss": 2.2327513694763184 }, { "id": "identity_21146", "image": "Geometry3K/images/1437.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of a regular pentagon with a perimeter of 40 centimeters.\nChoices:\n(A) 88\n(B) 110\n(C) 132\n(D) 160" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.890625, "loss": 2.349931240081787 }, { "id": "identity_97893", "image": "CLEVR-Math/images/CLEVR_train_043123.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cubes are present in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 21.03125, "loss": 2.4014012813568115 }, { "id": "identity_222441", "image": "Super-CLEVR/images/superCLEVR_new_000939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more airplanes than land vehicles in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.109375, "loss": 3.145179033279419 }, { "id": "identity_6328", "image": "TQA/images/question_images/cycle_rock_6741.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What happens if you apply heat and pressure to igneous rock?\nChoices:\n(A) Turn to magma\n(B) Turns to sediment\n(C) Turns to sedimentary rock\n(D) Turns to metamorphic rock" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 35.53125, "loss": 3.869929790496826 }, { "id": "identity_316269", "image": "UniGeo/images/1473.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,直线l_{1}∥l_{2}∥l_{3}于点A、B、C,直线DF分别交l_{1}、l_{2}、l_{3}于点D、E、F,AC与DF相交于点H,如果AB=5,BH=1,CH=2,那么\\frac{EF}{DE}的值等于多少?\n\nChoices:\n(A) \\frac{1}{5}\n(B) \\frac{1}{3}\n(C) \\frac{2}{5}\n(D) \\frac{3}{5}" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 28.0, "loss": 2.3349032402038574 }, { "id": "identity_171058", "image": "IconQA/images/106747.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many purple ovals are there in total?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 37.96875, "loss": 1.8980653285980225 }, { "id": "identity_283997", "image": "DocVQA/images/yjpg0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who holds the position of Dean at the School of Pharmacy, Medical College of Virginia?" }, { "from": "gpt", "value": "The answer is r. b. smith, jr." } ], "clip_score": 21.109375, "loss": 2.2537758350372314 }, { "id": "identity_317526", "image": "TabMWP/images/21618.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more points did one person score than another in the trivia game?" }, { "from": "gpt", "value": "The answer is 168" } ], "clip_score": 32.90625, "loss": 2.2633872032165527 }, { "id": "identity_299132", "image": "IconQA/images/31481.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Estimate the length of the line using paper clips. The line is approximately (_) paper clips long." }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 29.421875, "loss": 2.585031509399414 }, { "id": "identity_286973", "image": "ScienceQA/images/15656.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likelihood that a Labrador retriever resulting from this cross will exhibit homozygous recessive fur color gene expression?\nChoices:\n(A) 1/4\n(B) 2/4\n(C) 0/4\n(D) 3/4\n(E) 4/4" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 19.90625, "loss": 3.5787975788116455 }, { "id": "identity_9694", "image": "FigureQA/images/52801.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Orange Red greater than Green Yellow?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 15.4296875, "loss": 3.648670196533203 }, { "id": "identity_117900", "image": "VQA-AS/images/17137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many books are stacked on the floor?" }, { "from": "gpt", "value": "The answer is Three" } ], "clip_score": 15.8984375, "loss": 4.848727226257324 }, { "id": "identity_120966", "image": "A-OKVQA/images/525024.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the kite closest to the top of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 24.515625, "loss": 3.688633680343628 }, { "id": "identity_204367", "image": "A-OKVQA/images/109008.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are wearing hats?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 15.4375, "loss": 2.6661903858184814 }, { "id": "identity_298181", "image": "PMC-VQA/images/PMC7891795_FIG1_370421.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which sinus is the evidence of sinus disease shown in the CT scan?\nChoices:\n(A) Right frontal sinus\n(B) Left sphenoid sinus\n(C) Right sphenoid sinus\n(D) Left maxillary sinus" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.796875, "loss": 4.420820713043213 }, { "id": "identity_186848", "image": "TextVQA/images/b9ad2c53a7a723b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color used in the top right corner of the image?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 12.953125, "loss": 2.8441202640533447 }, { "id": "identity_321905", "image": "Geometry3K/images/1219.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of b using the properties of the parallelogram.\nChoices:\n(A) 2\n(B) 3\n(C) 14\n(D) 17" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 32.90625, "loss": 2.3476383686065674 }, { "id": "identity_223436", "image": "TextVQA/images/7972f78d439a79e8.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the birthplace of the player displayed on the screen?" }, { "from": "gpt", "value": "The answer is Móstoles (Spain)" } ], "clip_score": 27.640625, "loss": 2.3124148845672607 }, { "id": "identity_333945", "image": "A-OKVQA/images/16578.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of vehicle is the one with 42 on it?\nChoices:\n(A) convertible\n(B) taxi\n(C) shipping\n(D) prison" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.7890625, "loss": 4.239861488342285 }, { "id": "identity_209505", "image": "PMC-VQA/images/PMC1283984_F4_3833.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What staining technique is likely used to achieve the coloration seen in this microscopic image?" }, { "from": "gpt", "value": "The answer is Hematoxylin and Eosin (H&E) staining" } ], "clip_score": 31.78125, "loss": 0.9602605700492859 }, { "id": "identity_154156", "image": "AI2D/images/398.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the tertiary consumer in this food chain?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.375, "loss": 2.960724353790283 }, { "id": "identity_109880", "image": "VQA-RAD/images/synpic40426.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality is used to obtain this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 33.75, "loss": 2.8053741455078125 }, { "id": "identity_238659", "image": "PlotQA/images/116269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year was the difference between the percentage of revenue generated by grants and the percentage of revenue generated by taxes the maximum?" }, { "from": "gpt", "value": "The answer is 2002" } ], "clip_score": 29.671875, "loss": 1.8516314029693604 }, { "id": "identity_173947", "image": "IconQA/images/73296.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove all the blue circles, how many circles will remain?" }, { "from": "gpt", "value": "The answer is 40" } ], "clip_score": 30.296875, "loss": 2.1811063289642334 }, { "id": "identity_69180", "image": "AI2D/images/3138.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the leaf structure is responsible for photosynthesis?" }, { "from": "gpt", "value": "The answer is Palisade Mesophyll" } ], "clip_score": 25.3125, "loss": 1.5529563426971436 }, { "id": "identity_260508", "image": "FigureQA/images/5424.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the Dark Orchid section to the Orange section?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.0, "loss": 2.6939220428466797 }, { "id": "identity_37178", "image": "PMC-VQA/images/PMC4645184_f6_443921.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the color used to stain YAP in the image? \nChoices:\n(A) Yellow \n(B) Blue \n(C) Red \n(D) Green " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.3828125, "loss": 3.8088021278381348 }, { "id": "identity_100042", "image": "TabMWP/images/19359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item is the least expensive and by how much compared to the next least expensive item?" }, { "from": "gpt", "value": "The answer is Flannel sheet, $0.97" } ], "clip_score": 33.1875, "loss": 2.8026351928710938 }, { "id": "identity_27248", "image": "TabMWP/images/28581.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The table shows a function. Is the function linear or nonlinear?\nChoices:\n(A) linear\n(B) nonlinear" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 25.875, "loss": 4.54069185256958 }, { "id": "identity_16247", "image": "PMC-VQA/images/PMC8110773_Fig3_436844.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are some parts of the brain mentioned in the caption? \nChoices:\n(A) hippocampus (hc), \n(B) nucleus accumbens (nac), \n(C) choroid plexus (cp), \n(D) cerebellum (cb) " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.640625, "loss": 3.207401990890503 }, { "id": "identity_105713", "image": "A-OKVQA/images/11968.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bridle on the horse?" }, { "from": "gpt", "value": "The answer is Red and black" } ], "clip_score": 29.09375, "loss": 3.0588538646698 }, { "id": "identity_72690", "image": "ScienceQA/images/3267.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beans are visible in the image?" }, { "from": "gpt", "value": "The answer is Seven" } ], "clip_score": 31.125, "loss": 4.29918909072876 }, { "id": "identity_63007", "image": "ScienceQA/images/2140.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which pair of magnets will experience a repulsive force?" }, { "from": "gpt", "value": "The answer is Pair 2" } ], "clip_score": 23.46875, "loss": 1.7357800006866455 }, { "id": "identity_269899", "image": "MapQA/images/map_788.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the numerical worth of Utah?" }, { "from": "gpt", "value": "The answer is 73.8%-95.8%" } ], "clip_score": 24.453125, "loss": 2.4493746757507324 }, { "id": "identity_32584", "image": "PMC-VQA/images/PMC6662306_pbi13074-fig-0003_15391.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which element is not shown in the image? \nChoices:\n(A) Fe \n(B) Zn \n(C) Na \n(D) None of the above " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 17.546875, "loss": 3.7253470420837402 }, { "id": "identity_160436", "image": "GeoQA+/images/2108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If angle 1 measures 50 degrees and line a is parallel to line b, what is the measure of angle BAC?" }, { "from": "gpt", "value": "The answer is 40 degrees" } ], "clip_score": 30.453125, "loss": 1.942492961883545 }, { "id": "identity_12862", "image": "TextVQA/images/53135d87a1768a41.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what gender is the person listed?" }, { "from": "gpt", "value": "The answer is male" } ], "clip_score": 17.734375, "loss": 3.6975607872009277 }, { "id": "identity_24089", "image": "TextVQA/images/104928ae82c7a41b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what number is the small hand pointing too?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 11.6328125, "loss": 2.265106201171875 }, { "id": "identity_215893", "image": "VQA2.0/images/563329.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the kite closest to the girl in pink?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 25.703125, "loss": 3.728607177734375 }, { "id": "identity_251870", "image": "VizWiz/images/VizWiz_train_00012248.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many total ounces of shredded mango are there in the three visible jars combined?" }, { "from": "gpt", "value": "The answer is 39 oz" } ], "clip_score": 22.46875, "loss": 1.9830355644226074 }, { "id": "identity_191008", "image": "VQA2.0/images/412698.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bird's plumage?" }, { "from": "gpt", "value": "The answer is Grey" } ], "clip_score": 23.75, "loss": 4.125683784484863 }, { "id": "identity_321344", "image": "PMC-VQA/images/PMC7574238_Fig1_278451.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which specific part of the brain was mainly involved in the hemorrhagic infarction?\nChoices:\n(A) Frontal lobe \n(B) Parietal lobe \n(C) Temporal lobe \n(D) Occipital lobe" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.984375, "loss": 3.8397469520568848 }, { "id": "identity_53742", "image": "FigureQA/images/69164.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate value of the Green Yellow line at the 50 x-axis label?" }, { "from": "gpt", "value": "The answer is 63" } ], "clip_score": 22.859375, "loss": 2.7640531063079834 }, { "id": "identity_85116", "image": "GeoQA+/images/1353.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of segment AG, where G is the point of intersection between diagonal BD and side AB?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 33.96875, "loss": 2.9706897735595703 }, { "id": "identity_229527", "image": "ChartQA/images/382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total percentage of registered voters who believe it is at least somewhat important that people who are NOT legally qualified to vote are prevented from casting a ballot?" }, { "from": "gpt", "value": "The answer is 84%" } ], "clip_score": 32.78125, "loss": 1.783639669418335 }, { "id": "identity_126941", "image": "PMC-VQA/images/PMC6994645_Fig1_109414.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any label indicating the patient's position during the imaging?" }, { "from": "gpt", "value": "The answer is No label indicating patient's position" } ], "clip_score": 31.390625, "loss": 2.8376171588897705 }, { "id": "identity_307047", "image": "PMC-VQA/images/PMC7377830_f10_218656.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the histogram plot used to analyze?\nChoices:\n(A) muscular structure analysis \n(B) trabecular structure analysis \n(C) bone mass analysis \n(D) bone volume analysis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.5546875, "loss": 4.301033020019531 }, { "id": "identity_184183", "image": "A-OKVQA/images/69486.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What beverage brand is prominently displayed on the sign in the image?" }, { "from": "gpt", "value": "The answer is Coca-Cola" } ], "clip_score": 16.5, "loss": 1.7066407203674316 }, { "id": "identity_263950", "image": "AI2D/images/117.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the letter B represent in the following options: (A) North Pole, (B) Moon, (C) Equator, (D) South Pole?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 28.9375, "loss": 4.331268310546875 }, { "id": "identity_248740", "image": "TextVQA/images/5a59b71a8697b3aa.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many different sponsors can be identified in the image, and which categories do they belong to (e.g., insurance, food, transportation)?" }, { "from": "gpt", "value": "The answer is Twelve sponsors, categories include insurance, entertainment, media, food, beverage, airline, transportation, fuel, automobile, furniture, electronics, and banking" } ], "clip_score": 17.71875, "loss": 2.2141122817993164 }, { "id": "identity_250939", "image": "Geometry3K/images/815.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the values of x and y so that the quadrilateral is a parallelogram" }, { "from": "gpt", "value": "The answer is x = 8, y = 6" } ], "clip_score": 37.5, "loss": 1.6618794202804565 }, { "id": "identity_16476", "image": "IconQA/images/28358.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: There are 3 pinecones in the top row. How many pinecones are in the bottom row?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 40.28125, "loss": 2.500082492828369 }, { "id": "identity_224550", "image": "VizWiz/images/VizWiz_train_00012104.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total length of the LEGO piece in centimeters, and how many 1x1 LEGO bricks would be needed to match this length if each 1x1 LEGO brick is 0.8 cm long?" }, { "from": "gpt", "value": "The answer is 7.62 cm and 10 bricks" } ], "clip_score": 29.265625, "loss": 2.1411943435668945 }, { "id": "identity_58436", "image": "DocVQA/images/rgfl0225_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the sponsor of the study?" }, { "from": "gpt", "value": "The answer is R.J. Reynolds Tobacco Company" } ], "clip_score": 11.828125, "loss": 1.4715734720230103 }, { "id": "identity_101145", "image": "IconQA/images/44863.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove 4 small circles, how many circles will remain?" }, { "from": "gpt", "value": "The answer is 11" } ], "clip_score": 30.15625, "loss": 2.071096658706665 }, { "id": "identity_35793", "image": "TabMWP/images/32911.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more does a cherry pie cost than a key lime pie?" }, { "from": "gpt", "value": "The answer is 1.87" } ], "clip_score": 35.15625, "loss": 2.2235710620880127 }, { "id": "identity_138298", "image": "DVQA/images/bar_train_00085178.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the 'moon' dataset more accurate than the 'mature' dataset for 'love'?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.46875, "loss": 3.0199966430664062 }, { "id": "identity_194601", "image": "Super-CLEVR/images/superCLEVR_new_001469.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many bicycles are there in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.765625, "loss": 2.4170713424682617 }, { "id": "identity_189268", "image": "A-OKVQA/images/68657.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What activity is the person engaged in?" }, { "from": "gpt", "value": "The answer is Playing video games" } ], "clip_score": 19.34375, "loss": 2.2057032585144043 }, { "id": "identity_282633", "image": "VQA2.0/images/103352.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the spigot made of copper?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 14.9453125, "loss": 4.081089019775391 }, { "id": "identity_103465", "image": "A-OKVQA/images/484208.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many sheep are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 26.71875, "loss": 2.1882646083831787 }, { "id": "identity_293293", "image": "TextVQA/images/d0909d962b0b0513.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which word appears first on the sign on the left?" }, { "from": "gpt", "value": "The answer is jogunes" } ], "clip_score": 17.546875, "loss": 7.3137431144714355 }, { "id": "identity_223972", "image": "PMC-VQA/images/PMC8032970_F3_412532.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique was used to detect remission in the patients, and what are the levels of CXCL13 and CXCL9 in pg/ml?" }, { "from": "gpt", "value": "The answer is MRI, CXCL13: 616 pg/ml, CXCL9: 239 pg/ml" } ], "clip_score": 36.28125, "loss": 1.0089212656021118 }, { "id": "identity_114209", "image": "PMC-VQA/images/PMC7788043_FIG3_335994.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the abnormality seen in the image?" }, { "from": "gpt", "value": "The answer is There is a mass in the left maxillary sinus" } ], "clip_score": 29.046875, "loss": 2.898131847381592 }, { "id": "identity_303842", "image": "Geometry3K/images/2014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the value of x.\nChoices:\n(A) 11.4\n(B) 22.6\n(C) 32.3\n(D) 43.2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.234375, "loss": 2.905067205429077 }, { "id": "identity_178297", "image": "TabMWP/images/30739.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many children weighed more than 40 pounds?" }, { "from": "gpt", "value": "The answer is 3 children" } ], "clip_score": 31.5625, "loss": 2.3605306148529053 }, { "id": "identity_77078", "image": "AI2D/images/3328.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Golgi body\"?" }, { "from": "gpt", "value": "The answer is Modifies, sorts, and packages proteins and lipids" } ], "clip_score": 25.125, "loss": 1.6029045581817627 }, { "id": "identity_24176", "image": "DVQA/images/bar_train_00141930.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value of lumber in home larger than the value of bank in self?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 23.59375, "loss": 3.7846646308898926 }, { "id": "identity_170328", "image": "TabMWP/images/9406.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many more minutes did Mandy practice on Saturday compared to Thursday?" }, { "from": "gpt", "value": "The answer is 4 minutes" } ], "clip_score": 33.96875, "loss": 2.4466564655303955 }, { "id": "identity_290063", "image": "TextVQA/images/81fc54d6ff1b4cf6.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What did the yellow car say?" }, { "from": "gpt", "value": "The answer is taxi" } ], "clip_score": 12.6796875, "loss": 3.160710334777832 }, { "id": "identity_276646", "image": "PMC-VQA/images/PMC7189031_os12665-fig-0004_160452.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which surgical procedure is being depicted in the radiographic image?\nChoices:\n(A) Hip dislocation surgery\n(B) Arthroscopy\n(C) Transverse osteotomy combined with total hip arthroplasty (THA)\n(D) Craniotomy." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 27.96875, "loss": 3.92297101020813 }, { "id": "identity_303201", "image": "PMC-VQA/images/PMC9005803_F1_252228.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the yellow arrow indicate in this image?\nChoices:\n(A) A normal ventricle\n(B) Brain atrophy\n(C) Enlarged ventricle in the acute stage\n(D) Changes after cocktail therapy" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 29.359375, "loss": 4.0799970626831055 }, { "id": "identity_104289", "image": "PMC-VQA/images/PMC7149892_Fig3_149735.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the magnification level of the image?" }, { "from": "gpt", "value": "The answer is 11,000x" } ], "clip_score": 23.984375, "loss": 1.8153377771377563 }, { "id": "identity_313502", "image": "VQA-RAD/images/synpic17145.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the diaphragm is elevated?" }, { "from": "gpt", "value": "The answer is right" } ], "clip_score": 24.796875, "loss": 4.356619834899902 }, { "id": "identity_68395", "image": "ScienceQA/images/5395.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tertiary consumers are shown in the image?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 13.390625, "loss": 2.508969306945801 }, { "id": "identity_147591", "image": "DocVQA/images/ftgj0223_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main product/service group code?" }, { "from": "gpt", "value": "The answer is 5.15E+18" } ], "clip_score": 18.390625, "loss": 3.4733288288116455 }, { "id": "identity_179266", "image": "CLEVR-Math/images/CLEVR_train_016512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract 1 red shiny block. How many red shiny blocks are left?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 21.78125, "loss": 2.2503883838653564 }, { "id": "identity_234077", "image": "Super-CLEVR/images/superCLEVR_new_000416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the materials of all the objects that are in front of the car and behind the airliner?" }, { "from": "gpt", "value": "The answer is metal and rubber" } ], "clip_score": 28.609375, "loss": 2.890076160430908 }, { "id": "identity_44273", "image": "FigureQA/images/58308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the sum of the values for Dark Red and Peru?" }, { "from": "gpt", "value": "The answer is 50" } ], "clip_score": 22.140625, "loss": 2.4628639221191406 }, { "id": "identity_167267", "image": "TabMWP/images/13723.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of Nobel Prize winners per country in the 1970s?" }, { "from": "gpt", "value": "The answer is 4.2" } ], "clip_score": 40.46875, "loss": 2.1148102283477783 }, { "id": "identity_191060", "image": "VQA2.0/images/267731.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the platform the cow statue is standing on?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 29.59375, "loss": 3.4914157390594482 }, { "id": "identity_120683", "image": "A-OKVQA/images/567616.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many halves of fruit are there in the bowl?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 24.875, "loss": 2.0257022380828857 }, { "id": "identity_218666", "image": "PMC-VQA/images/PMC9195320_Fig2_310469.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging modality is used to capture this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 32.9375, "loss": 2.474302053451538 }, { "id": "identity_184979", "image": "PMC-VQA/images/PMC6004303_fig-4_329874.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of cells are predominantly visible in this micrograph?" }, { "from": "gpt", "value": "The answer is Neuron cells" } ], "clip_score": 28.390625, "loss": 2.6419363021850586 }, { "id": "identity_81581", "image": "GeoQA+/images/4169.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the circle with center O, if AB is the diameter and ∠ABD = 58°, what is the measure of ∠ACD?" }, { "from": "gpt", "value": "The answer is 58°" } ], "clip_score": 35.34375, "loss": 1.8533599376678467 }, { "id": "identity_164337", "image": "UniGeo/images/810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In right triangle ABC, with angle BAC equal to 90 degrees, AC equals 3, and AB equals 4, what is the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 34.46875, "loss": 2.2661941051483154 }, { "id": "identity_110512", "image": "VQA2.0/images/262260.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many benches are visible in the image?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 18.171875, "loss": 2.4691340923309326 }, { "id": "identity_50505", "image": "PlotQA/images/58407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary completion rate for Benin in 1983?" }, { "from": "gpt", "value": "The answer is Approximately 40%" } ], "clip_score": 26.6875, "loss": 2.2704784870147705 }, { "id": "identity_305547", "image": "Super-CLEVR/images/superCLEVR_new_000716.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the large object behind the red motorbike shaped like the brown metallic object?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 16.78125, "loss": 5.1177473068237305 }, { "id": "identity_202007", "image": "PMC-VQA/images/PMC7545596_acn351170-fig-0001_268760.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique was used to obtain this image?" }, { "from": "gpt", "value": "The answer is MRI" } ], "clip_score": 33.3125, "loss": 2.3784067630767822 }, { "id": "identity_165582", "image": "GeoQA+/images/3901.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the coordinates of point E are (x, y), find the value of x" }, { "from": "gpt", "value": "The answer is 7.5" } ], "clip_score": 31.890625, "loss": 2.3720946311950684 }, { "id": "identity_335387", "image": "PMC-VQA/images/PMC7367244_Fig1_215713.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which imaging method is most suitable for assessing the extent of the tumor?\nChoices:\n(A) Radiograph \n(B) CT scan \n(C) MRI \n(D) PET scan" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 26.765625, "loss": 4.0872955322265625 }, { "id": "identity_245046", "image": "UniGeo/images/139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a,b被直线c所截,且a//b,若∠1=55°,求∠3的度数。" }, { "from": "gpt", "value": "The answer is 125°" } ], "clip_score": 26.109375, "loss": 1.7624566555023193 }, { "id": "identity_323749", "image": "PMC-VQA/images/PMC8099424_fig1s3_433955.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the probe used for labeling in this image?\nChoices:\n(A) GFP\n(B) Apcdd1\n(C) APC\n(D) CDH2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 20.109375, "loss": 3.8644402027130127 }, { "id": "identity_44375", "image": "ChartQA/images/OECD_GENERAL_GOVERNMENT_DEFICIT_BRA_CHL_IDN_JPN_USA_000139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between 1990 and 2000, which country had the most consistent government deficit trend?" }, { "from": "gpt", "value": "The answer is Indonesia" } ], "clip_score": 33.90625, "loss": 3.14418888092041 }, { "id": "identity_47269", "image": "FigureQA/images/70540.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many data points does Rosy Brown have above the value of 80 on the y-axis?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 27.421875, "loss": 2.246088743209839 }, { "id": "identity_68293", "image": "ScienceQA/images/20101.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the process called that leads from fertilized cone to seed?" }, { "from": "gpt", "value": "The answer is Seed dispersal" } ], "clip_score": 24.515625, "loss": 3.5024309158325195 }, { "id": "identity_308398", "image": "DocVQA/images/yncd0227_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is mentioned in the header of this document?" }, { "from": "gpt", "value": "The answer is chcop agenda" } ], "clip_score": 21.75, "loss": 5.580977439880371 }, { "id": "identity_306480", "image": "ScienceQA/images/8577.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Minneapolis\n(B) Des Moines\n(C) Saint Paul\n(D) Indianapolis" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.453125, "loss": 4.507691383361816 }, { "id": "identity_220363", "image": "A-OKVQA/images/409906.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of establishment is advertised on the billboard with the green background?" }, { "from": "gpt", "value": "The answer is A restaurant" } ], "clip_score": 15.1171875, "loss": 4.093342304229736 }, { "id": "identity_40203", "image": "FigureQA/images/91917.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many curves in the graph have a dotted line pattern?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 23.359375, "loss": 2.6702072620391846 }, { "id": "identity_312007", "image": "ChartQA/images/multi_col_20338.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of these stores from 2017 to 2018?" }, { "from": "gpt", "value": "The answer is 89.5" } ], "clip_score": 21.4375, "loss": 2.1555399894714355 }, { "id": "identity_221285", "image": "VQA2.0/images/260181.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many visible water bottles are on the floor?" }, { "from": "gpt", "value": "The answer is One water bottle" } ], "clip_score": 17.265625, "loss": 3.8719375133514404 }, { "id": "identity_98530", "image": "TabMWP/images/8819.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On which day did the restaurant improve its food preparation time the most compared to the previous day?" }, { "from": "gpt", "value": "The answer is Saturday" } ], "clip_score": 34.09375, "loss": 3.0036022663116455 }, { "id": "identity_68355", "image": "TQA/images/question_images/human_system_eye_6150.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What fills the space labeled \"Posterior cavity\"?" }, { "from": "gpt", "value": "The answer is Vitreous gel" } ], "clip_score": 30.828125, "loss": 1.6806631088256836 }, { "id": "identity_19438", "image": "TQA/images/question_images/seasons_1042.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: From the diagram, what comes after vernal equinox march 21/22?\nChoices:\n(A) autumnal equinox september 22/23\n(B) summer solstice june 20/21\n(C) winter solstice december 21/22\n(D) sun" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 41.875, "loss": 3.8195061683654785 }, { "id": "identity_17970", "image": "PlotQA/images/128545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label of the 3rd group of bars from the left ?" }, { "from": "gpt", "value": "The answer is 2006" } ], "clip_score": 19.28125, "loss": 2.9026942253112793 }, { "id": "identity_306550", "image": "TextVQA/images/d6d0a6b7e81a519b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of phone?" }, { "from": "gpt", "value": "The answer is htc" } ], "clip_score": 30.046875, "loss": 3.1103391647338867 }, { "id": "identity_73627", "image": "TQA/images/abc_question_images/tides_10149.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What does the label 'F' represent on the Earth?" }, { "from": "gpt", "value": "The answer is The North Pole" } ], "clip_score": 32.96875, "loss": 2.7234926223754883 }, { "id": "identity_118733", "image": "TextVQA/images/cf80e87381e2fc8e.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many artists are listed in the line-up for the event?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.578125, "loss": 2.220754384994507 }, { "id": "identity_124946", "image": "PMC-VQA/images/PMC7943055_FIG4_386868.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is prominently featured in the center of this image?" }, { "from": "gpt", "value": "The answer is Knee joint" } ], "clip_score": 26.578125, "loss": 3.0301034450531006 }, { "id": "identity_45833", "image": "FigureQA/images/80571.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of Cyan at the 50 mark on the x-axis label?" }, { "from": "gpt", "value": "The answer is 99.2" } ], "clip_score": 24.578125, "loss": 2.6061618328094482 }, { "id": "identity_271309", "image": "DVQA/images/bar_train_00141653.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the bars all one solid color with no patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 23.09375, "loss": 3.564121961593628 }, { "id": "identity_317871", "image": "GeoQA+/images/216.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,⊙O中,弦AC与BC是两条弦,∠C=35°,则∠O的度数是()\n选项:\n(A) 25°\n(B) 35°\n(C) 65°\n(D) 70°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 31.953125, "loss": 2.7972710132598877 }, { "id": "identity_111679", "image": "TextVQA/images/c9b4f3b3af0f4f03.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the watch displaying time in 12-hour or 24-hour format?" }, { "from": "gpt", "value": "The answer is 24-hour format" } ], "clip_score": 20.6875, "loss": 1.6473333835601807 }, { "id": "identity_32583", "image": "MapQA/images/map_7090.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the lowest value in the USA ?" }, { "from": "gpt", "value": "The answer is 39.6%-43.4%" } ], "clip_score": 24.125, "loss": 2.101907730102539 }, { "id": "identity_271947", "image": "UniGeo/images/1427.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, PQ, PR, and AB are tangents to circle O, with points Q, R, and S as the points of tangency. If ∠APB = 40°, what is the measure of ∠AOB? \n(A) 40°\n(B) 50°\n(C) 60°\n(D) 70°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 37.4375, "loss": 2.533433437347412 }, { "id": "identity_219295", "image": "TextVQA/images/ef2984aa210590d0.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals can be seen in the crowd within the illustration?" }, { "from": "gpt", "value": "The answer is An integer number" } ], "clip_score": 15.4140625, "loss": 4.513433933258057 }, { "id": "identity_315998", "image": "DVQA/images/bar_train_00168495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the smallest valued bar in the chart?" }, { "from": "gpt", "value": "The answer is mode" } ], "clip_score": 27.296875, "loss": 3.3770744800567627 }, { "id": "identity_97758", "image": "IconQA/images/7278.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the grid represents a coordinate plane, what is the coordinate of the top right corner of the letter B?" }, { "from": "gpt", "value": "The answer is (3,5)" } ], "clip_score": 27.078125, "loss": 1.6165785789489746 }, { "id": "identity_1812", "image": "Super-CLEVR/images/superCLEVR_new_000488.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number of rubber objects are big dirtbikes or big cyan tandem bikes?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 20.640625, "loss": 2.411708116531372 }, { "id": "identity_119049", "image": "Super-CLEVR/images/superCLEVR_new_001545.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is the purple object in the image?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 23.984375, "loss": 4.054449558258057 }, { "id": "identity_213661", "image": "PMC-VQA/images/PMC3056211_fig1_89835.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible branching of the artery near the area indicated by the white arrow?" }, { "from": "gpt", "value": "The answer is No visible branching" } ], "clip_score": 28.75, "loss": 3.7026631832122803 }, { "id": "identity_306652", "image": "AI2D/images/2229.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many stages of a butterfly's life cycle are shown?\nChoices:\n(A) 4\n(B) 6\n(C) 7\n(D) 5" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.3125, "loss": 2.9906489849090576 }, { "id": "identity_210925", "image": "PMC-VQA/images/PMC6000605_Figure1_328705.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the size of the sublobe mass in centimeters?" }, { "from": "gpt", "value": "The answer is Measurement in centimeters" } ], "clip_score": 16.421875, "loss": 2.5583856105804443 }, { "id": "identity_71241", "image": "AI2D/images/1683.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the lighter shadow that surrounds the umbra during an eclipse?" }, { "from": "gpt", "value": "The answer is Penumbra" } ], "clip_score": 26.109375, "loss": 2.3000545501708984 }, { "id": "identity_3997", "image": "DocVQA/images/qkcw0217_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who has assigned the document?" }, { "from": "gpt", "value": "The answer is Karen D. Mittleman, PhD" } ], "clip_score": 27.140625, "loss": 2.781493663787842 }, { "id": "identity_55042", "image": "DocVQA/images/rxhm0227_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the Professor Emeritus mentioned in the document?" }, { "from": "gpt", "value": "The answer is Rene J. Dubos, Ph.D" } ], "clip_score": 18.953125, "loss": 1.821848750114441 }, { "id": "identity_264532", "image": "FigureQA/images/43697.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Light Slate the median with the lowest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.34375, "loss": 4.152453422546387 }, { "id": "identity_55020", "image": "PlotQA/images/116269.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which year had the highest percentage of revenue generated by interest?" }, { "from": "gpt", "value": "The answer is 2003" } ], "clip_score": 29.96875, "loss": 1.7543656826019287 }, { "id": "identity_295637", "image": "A-OKVQA/images/385405.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What category of store is this?\nChoices:\n(A) shoe\n(B) grocery\n(C) pet\n(D) beauty" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 18.921875, "loss": 4.186955451965332 }, { "id": "identity_188765", "image": "PMC-VQA/images/PMC9354698_f2_364693.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image shown in panel A?" }, { "from": "gpt", "value": "The answer is Axial" } ], "clip_score": 24.359375, "loss": 3.317167282104492 }, { "id": "identity_218104", "image": "VizWiz/images/VizWiz_train_00017733.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many calories are there per serving according to the cereal box?" }, { "from": "gpt", "value": "The answer is 110 calories" } ], "clip_score": 30.28125, "loss": 1.5656805038452148 }, { "id": "identity_142249", "image": "MapQA/images/map_11003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any state with a percentage range of difficulties higher than 59.4%?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.765625, "loss": 3.005018711090088 }, { "id": "identity_287567", "image": "TabMWP/images/31796.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many tie racks at the department store have exactly 25 ties, as counted by the employee?" }, { "from": "gpt", "value": "The answer is 0" } ], "clip_score": 28.3125, "loss": 2.3840558528900146 }, { "id": "identity_69901", "image": "ScienceQA/images/5810.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which object in the image is likely to be the most flexible?" }, { "from": "gpt", "value": "The answer is Handkerchief" } ], "clip_score": 24.15625, "loss": 1.758184790611267 }, { "id": "identity_80056", "image": "GeoQA+/images/4598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Given triangle ABC with ∠C=90° and ∠B=15°, and DE perpendicular bisector of AB with foot at E, if AD=8cm, what is the length of CD?" }, { "from": "gpt", "value": "The answer is CD ≈ 7.7274 cm" } ], "clip_score": 32.59375, "loss": 2.0710041522979736 }, { "id": "identity_131169", "image": "VQA-AS/images/19868.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many plants are visible in the room?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 24.171875, "loss": 4.459424018859863 }, { "id": "identity_82029", "image": "GEOS/images/028.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the radius of the circle is 5 units, what is the length of the arc between the points of tangency?" }, { "from": "gpt", "value": "The answer is (65/9)π units" } ], "clip_score": 29.875, "loss": 2.77437686920166 }, { "id": "identity_135458", "image": "DocVQA/images/ghdw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date of the next meeting?" }, { "from": "gpt", "value": "The answer is July 27, 2000" } ], "clip_score": 18.734375, "loss": 1.694225549697876 }, { "id": "identity_19295", "image": "GeoQA+/images/2195.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,圆O的直径AB为4,点C在圆O上,∠ACB的平分线交圆O于点D,连接AD、BD,则AD的长等于()\nChoices:\n(A) 2\n(B) 3\n(C) 2√{2}\n(D) 2√{3}" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 28.859375, "loss": 2.388986587524414 }, { "id": "identity_52698", "image": "DocVQA/images/mfyk0226_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the article?" }, { "from": "gpt", "value": "The answer is WELCOME BACK, STRANGER" } ], "clip_score": 24.328125, "loss": 1.5444093942642212 }, { "id": "identity_172713", "image": "IconQA/images/40235.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the diamonds arranged in a symmetrical pattern?" }, { "from": "gpt", "value": "The answer is No, they are not symmetrical" } ], "clip_score": 30.578125, "loss": 2.564919948577881 }, { "id": "identity_292638", "image": "Super-CLEVR/images/superCLEVR_new_001359.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any purple objects remaining to the left of the blue chopper?" }, { "from": "gpt", "value": "The answer is True" } ], "clip_score": 26.0625, "loss": 5.3023810386657715 }, { "id": "identity_166132", "image": "TabMWP/images/26047.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of pizzas delivered over the past 6 days?" }, { "from": "gpt", "value": "The answer is 154" } ], "clip_score": 34.46875, "loss": 2.143160820007324 }, { "id": "identity_51468", "image": "FigureQA/images/89158.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Aqua between x-axis labels 0 and 100?" }, { "from": "gpt", "value": "The answer is Approximately 47.5" } ], "clip_score": 22.953125, "loss": 2.233410596847534 }, { "id": "identity_261856", "image": "VizWiz/images/VizWiz_train_00003948.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify what this is?" }, { "from": "gpt", "value": "The answer is bottle beer on table" } ], "clip_score": 33.03125, "loss": 3.0131068229675293 }, { "id": "identity_68813", "image": "TQA/images/question_images/nuclear_energy_7098.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How does the \"Condenser\" in the diagram contribute to the power generation process?" }, { "from": "gpt", "value": "The answer is Cools steam into water for reuse" } ], "clip_score": 29.796875, "loss": 4.040297985076904 }, { "id": "identity_175850", "image": "IconQA/images/48344.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What geometric shape are these figures?" }, { "from": "gpt", "value": "The answer is Octagon" } ], "clip_score": 33.46875, "loss": 3.7875330448150635 }, { "id": "identity_9421", "image": "DocVQA/images/kgcd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the subject of memorandum ?" }, { "from": "gpt", "value": "The answer is Correction of Holiday dates" } ], "clip_score": 18.953125, "loss": 2.115586042404175 }, { "id": "identity_183684", "image": "PMC-VQA/images/PMC5281579_F5_120924.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the three fluorescent colors visible in the image?" }, { "from": "gpt", "value": "The answer is Red, Green, and Yellow" } ], "clip_score": 25.34375, "loss": 1.7689361572265625 }, { "id": "identity_10873", "image": "GeoQA+/images/1793.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 分别以n边形的顶点为圆心,以单位1为半径画圆,如图所示,则图中阴影部分的面积之和是多少个平方单位?()\nChoices:\n(A) πn²\n(B) 2πn\n(C) \\frac{1}{2}πn²\n(D) π" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 21.53125, "loss": 3.118685483932495 }, { "id": "identity_336288", "image": "TextVQA/images/b481fe94659714a9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is on this?" }, { "from": "gpt", "value": "The answer is abcdefghijklmnopqrstuvwxyz" } ], "clip_score": 17.84375, "loss": 1.331407070159912 }, { "id": "identity_158058", "image": "ScienceQA/images/12844.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country shares the island with the highlighted country?" }, { "from": "gpt", "value": "The answer is Dominican Republic" } ], "clip_score": 33.21875, "loss": 2.2294363975524902 }, { "id": "identity_337332", "image": "VQA2.0/images/543466.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What two types of transportation are visible in this image?" }, { "from": "gpt", "value": "The answer is walking and driving" } ], "clip_score": 21.453125, "loss": 2.289661407470703 }, { "id": "identity_299435", "image": "ChartQA/images/multi_col_21105.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In which year were the most liver transplants performed?" }, { "from": "gpt", "value": "The answer is 2019" } ], "clip_score": 25.078125, "loss": 1.7383118867874146 }, { "id": "identity_156843", "image": "ScienceQA/images/943.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the coin in the image?" }, { "from": "gpt", "value": "The answer is Copper" } ], "clip_score": 30.53125, "loss": 3.4248876571655273 }, { "id": "identity_292752", "image": "GeoQA+/images/5014.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, quadrilateral ABCD is inscribed in circle O, with AD parallel to BC and BD bisecting angle ABC. If angle A is 130°, what is the measure of angle BDC? \nChoices:\n(A) 100°\n(B) 105°\n(C) 110°\n(D) 115°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.875, "loss": 2.263556957244873 }, { "id": "identity_228567", "image": "FigureQA/images/92504.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Between Steel Blue and Orange Red, which color has the highest value at x = 50, and what is the difference between their values at that point?" }, { "from": "gpt", "value": "The answer is Orange Red, 1" } ], "clip_score": 32.15625, "loss": 3.7920422554016113 }, { "id": "identity_40740", "image": "FigureQA/images/40949.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many colors are represented in the pie chart?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 31.5625, "loss": 2.5153467655181885 }, { "id": "identity_136827", "image": "PlotQA/images/65289.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the percentage of female teachers in primary schools in Austria?" }, { "from": "gpt", "value": "The answer is 10%" } ], "clip_score": 34.125, "loss": 1.7319998741149902 }, { "id": "identity_166845", "image": "IconQA/images/90983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many squares are there?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.265625, "loss": 2.635711431503296 }, { "id": "identity_262625", "image": "FigureQA/images/42026.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Gold not the low median?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.15625, "loss": 3.622744083404541 }, { "id": "identity_225270", "image": "PMC-VQA/images/PMC6281586_Fig6_408444.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the distinguishing characteristics of cortical bone formation seen in the CT image, and how do they compare to the characteristics of trabecular, cancellous, and laminar bone formations?" }, { "from": "gpt", "value": "The answer is Dense, compact, forms outer layer, compared to porous or layered" } ], "clip_score": 23.828125, "loss": 3.7072205543518066 }, { "id": "identity_39070", "image": "FigureQA/images/93498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: At what x-axis label does Royal Blue first exceed a value of 75?" }, { "from": "gpt", "value": "The answer is At x-axis label 80" } ], "clip_score": 32.75, "loss": 1.7566895484924316 }, { "id": "identity_55427", "image": "FigureQA/images/28179.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of Aqua to Medium Seafoam in the pie chart?" }, { "from": "gpt", "value": "The answer is 1:3" } ], "clip_score": 37.625, "loss": 1.65435791015625 }, { "id": "identity_185930", "image": "VQA2.0/images/153749.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the woman's jacket?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 11.5234375, "loss": 3.4767470359802246 }, { "id": "identity_122968", "image": "PMC-VQA/images/PMC4805622_Fig7_14393.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the camera in relation to the patient's body?" }, { "from": "gpt", "value": "The answer is Camera is oriented laterally to the hip joint" } ], "clip_score": 26.578125, "loss": 3.741931915283203 }, { "id": "identity_134291", "image": "DVQA/images/bar_train_00071711.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which category has the highest value for 'week'?" }, { "from": "gpt", "value": "The answer is lot" } ], "clip_score": 25.703125, "loss": 3.7489025592803955 }, { "id": "identity_56770", "image": "FigureQA/images/93210.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average value of Sky Blue on the y-axis label?" }, { "from": "gpt", "value": "The answer is 32.5" } ], "clip_score": 32.15625, "loss": 2.140327215194702 }, { "id": "identity_40448", "image": "FigureQA/images/32453.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart represent only two categories?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 33.875, "loss": 3.5438485145568848 }, { "id": "identity_51761", "image": "DocVQA/images/khng0023_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two product UPC codes mentioned in the document?" }, { "from": "gpt", "value": "The answer is 16612, 15212" } ], "clip_score": 22.40625, "loss": 2.5380916595458984 }, { "id": "identity_45536", "image": "MapQA/images/map_5268.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is California's percentage higher than New York's?" }, { "from": "gpt", "value": "The answer is Yes or No" } ], "clip_score": 29.265625, "loss": 3.018603801727295 }, { "id": "identity_19491", "image": "IconQA/images/82010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: The first picture is a turtle. Which picture is fifth?\nChoices:\n(A) turtle\n(B) fish\n(C) sub" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 27.53125, "loss": 3.996638774871826 }, { "id": "identity_272241", "image": "DocVQA/images/rnbx0223_198.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written in the top right corner of the page?" }, { "from": "gpt", "value": "The answer is FOR ALL OUR TOMORROWS" } ], "clip_score": 18.28125, "loss": 2.5195233821868896 }, { "id": "identity_139930", "image": "DocVQA/images/xnnp0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the office phone number of the employer?" }, { "from": "gpt", "value": "The answer is 373-8028" } ], "clip_score": 16.46875, "loss": 2.4099552631378174 }, { "id": "identity_148234", "image": "MapQA/images/map_5388.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the value for Florida greater than 80?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.078125, "loss": 3.531287670135498 }, { "id": "identity_288126", "image": "DVQA/images/bar_train_00096375.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In how many groups of bars is there at least one bar with a value less than 7?" }, { "from": "gpt", "value": "The answer is four" } ], "clip_score": 22.9375, "loss": 3.7857213020324707 }, { "id": "identity_21895", "image": "IconQA/images/29964.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Jen is dancing at a party this evening. The clock shows the time. What time is it?\nChoices:\n(A) 6:00 P.M.\n(B) 6:00 A.M." }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.546875, "loss": 3.5994510650634766 }, { "id": "identity_132094", "image": "Super-CLEVR/images/superCLEVR_new_000947.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of vehicle is positioned between the two metallic jets?" }, { "from": "gpt", "value": "The answer is Bus" } ], "clip_score": 27.90625, "loss": 3.328173875808716 }, { "id": "identity_195465", "image": "PMC-VQA/images/PMC4686739_F7_455080.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many arrows are present in the image?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 20.203125, "loss": 4.259383678436279 }, { "id": "identity_311625", "image": "TabMWP/images/37126.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: The town council reported on the number of stop signs on each street. What is the median of these numbers?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 39.84375, "loss": 2.3922927379608154 }, { "id": "identity_129065", "image": "PMC-VQA/images/PMC5920263_fig01_306710.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify any hardware or implants in the ankle?" }, { "from": "gpt", "value": "The answer is No hardware or implants present" } ], "clip_score": 31.171875, "loss": 2.9432222843170166 }, { "id": "identity_306032", "image": "VQA2.0/images/56048.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Could this be somewhere abroad?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 12.3671875, "loss": 2.9115614891052246 }, { "id": "identity_132801", "image": "TextVQA/images/5803b2404bf9470b.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time of day does the sky suggest it is?" }, { "from": "gpt", "value": "The answer is Daytime" } ], "clip_score": 16.796875, "loss": 2.8793251514434814 }, { "id": "identity_263251", "image": "UniGeo/images/355.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, point C lies on circle O. If ∠ACB = 35°, what is the measure of ∠AOB?\nChoices:\n(A) 35°\n(B) 60°\n(C) 70°\n(D) 90°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 36.40625, "loss": 2.817931890487671 }, { "id": "identity_231692", "image": "DVQA/images/bar_train_00076367.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of people who prefer the 'rise' category across all objects?" }, { "from": "gpt", "value": "The answer is 18" } ], "clip_score": 27.078125, "loss": 2.381389856338501 }, { "id": "identity_77138", "image": "AI2D/images/3999.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the depth range of the Earth's mantle according to the diagram?" }, { "from": "gpt", "value": "The answer is 800 to 1,800 miles (1,300 to 2,900 kilometers)" } ], "clip_score": 36.1875, "loss": 0.872710645198822 }, { "id": "identity_163981", "image": "GeoQA+/images/287.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the perimeter of parallelogram ABCD is 36 and OE = 3, what is the length of segment EF?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 34.9375, "loss": 2.0780885219573975 }, { "id": "identity_17903", "image": "VQA-AS/images/4559.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many cushions are on the couch?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 21.203125, "loss": 2.6191747188568115 }, { "id": "identity_59495", "image": "DVQA/images/bar_train_00065602.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any category where the preference for \"supper\" and \"mail\" is equal?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.703125, "loss": 3.371169328689575 }, { "id": "identity_66673", "image": "AI2D/images/4443.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which leaf has a single undivided blade?" }, { "from": "gpt", "value": "The answer is Simple leaf" } ], "clip_score": 33.8125, "loss": 2.7277276515960693 }, { "id": "identity_337115", "image": "ScienceQA/images/3034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Will these objects attract or repel each other?\nChoices:\n(A) repel\n(B) attract" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 19.734375, "loss": 3.5599448680877686 }, { "id": "identity_290084", "image": "PMC-VQA/images/PMC3932416_F6_268892.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the type of image displayed from the following options:\n(A) SEM image\n(B) CT scan image\n(C) Brightfield image\n(D) X-ray image" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.875, "loss": 5.154263019561768 }, { "id": "identity_198719", "image": "PMC-VQA/images/PMC7410867_Fig2_229887.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of tissue contrast is depicted in the image?" }, { "from": "gpt", "value": "The answer is T2-weighted contrast" } ], "clip_score": 29.5625, "loss": 2.946051836013794 }, { "id": "identity_106841", "image": "A-OKVQA/images/356286.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many motorcycles are visible in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 24.515625, "loss": 2.444504499435425 }, { "id": "identity_202572", "image": "PMC-VQA/images/PMC5907520_fig1_303828.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the image?" }, { "from": "gpt", "value": "The answer is Transverse or axial plane" } ], "clip_score": 21.65625, "loss": 3.8616042137145996 }, { "id": "identity_285577", "image": "ScienceQA/images/5182.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What items can Clara and Gabriel exchange in order to obtain what they desire?\nChoices:\n(A) Gabriel can trade his broccoli for Clara's oranges.\n(B) Clara can trade her tomatoes for Gabriel's carrots.\n(C) Clara can trade her tomatoes for Gabriel's broccoli.\n(D) Gabriel can trade his almonds for Clara's tomatoes." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.25, "loss": 3.1671178340911865 }, { "id": "identity_62823", "image": "ScienceQA/images/10517.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is represented in the image?" }, { "from": "gpt", "value": "The answer is Track suit" } ], "clip_score": 30.453125, "loss": 3.1776702404022217 }, { "id": "identity_290874", "image": "A-OKVQA/images/445503.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location are the people currently situated?\nChoices:\n(A) restaurant\n(B) garage\n(C) stadium\n(D) office building" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 14.2578125, "loss": 4.44367790222168 }, { "id": "identity_212897", "image": "TextVQA/images/4c581fd6ad545e7d.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many notifications are visible on the phone's screen?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 25.84375, "loss": 2.2229363918304443 }, { "id": "identity_104128", "image": "A-OKVQA/images/404655.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are the participants walking on a designated pedestrian path?" }, { "from": "gpt", "value": "The answer is Yes, they are on a designated path for the event" } ], "clip_score": 27.296875, "loss": 2.865670919418335 }, { "id": "identity_44457", "image": "PlotQA/images/1554.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of trademark applications made by nonresidents over the years shown?" }, { "from": "gpt", "value": "The answer is 4000" } ], "clip_score": 26.96875, "loss": 2.192274808883667 }, { "id": "identity_32832", "image": "GeoQA+/images/4984.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,在△ABC中,直线ED垂直平分线段BC,分别交BC、AB于点D点E,若BD=3,△AEC的周长为20,则△ABC的周长为()\nChoices:\n(A) 23\n(B) 26\n(C) 28\n(D) 30" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.28125, "loss": 2.5230956077575684 }, { "id": "identity_16362", "image": "AI2D/images/2956.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What portion is depicted in yellow in the diagram?\nChoices:\n(A) parietal lobe\n(B) frontal lobe\n(C) occipital lobe\n(D) none of the above" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.078125, "loss": 4.3756608963012695 }, { "id": "identity_199669", "image": "VQA2.0/images/42619.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a bicycle in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 16.0625, "loss": 3.0357630252838135 }, { "id": "identity_117052", "image": "A-OKVQA/images/67982.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the jacket the person is wearing?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 16.9375, "loss": 3.4281718730926514 }, { "id": "identity_236730", "image": "DocVQA/images/xhfg0227_39.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what is the total expenditure for health care services by hospitals in Cuyahoga County in 1955 and 1960 combined?" }, { "from": "gpt", "value": "The answer is $183,904,000" } ], "clip_score": 24.625, "loss": 1.6637213230133057 }, { "id": "identity_40108", "image": "DocVQA/images/jhyc0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who signed the letter?" }, { "from": "gpt", "value": "The answer is Robert E. Shank, M. D" } ], "clip_score": 24.65625, "loss": 2.4392454624176025 }, { "id": "identity_64120", "image": "ScienceQA/images/346.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What material is the first object made of?" }, { "from": "gpt", "value": "The answer is Steel" } ], "clip_score": 27.546875, "loss": 2.6854584217071533 }, { "id": "identity_88062", "image": "GeoQA+/images/5332.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the side length of the square with area 25?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 37.34375, "loss": 1.944503664970398 }, { "id": "identity_59361", "image": "DocVQA/images/hffn0226_17.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many CI PATIENT POSTERS are included in one package?" }, { "from": "gpt", "value": "The answer is 25" } ], "clip_score": 12.859375, "loss": 2.290620803833008 }, { "id": "identity_1753", "image": "AI2D/images/4000.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the region that represents the crust?\nChoices:\n(A) a\n(B) d\n(C) b\n(D) c" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.890625, "loss": 4.169675827026367 }, { "id": "identity_102265", "image": "IconQA/images/101302.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many minutes past 9 o'clock is it?" }, { "from": "gpt", "value": "The answer is 0 minutes" } ], "clip_score": 28.71875, "loss": 1.9925111532211304 }, { "id": "identity_265058", "image": "Super-CLEVR/images/superCLEVR_new_000898.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the material of the green object that shares the same shape as the large gray rubber thing?" }, { "from": "gpt", "value": "The answer is metal" } ], "clip_score": 16.484375, "loss": 4.079949378967285 }, { "id": "identity_41806", "image": "DocVQA/images/jnjm0223_59.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country is associated with the study on \"Amitriptyline comparator in Cancer-Pain--full spectrum of cancer pain\"?" }, { "from": "gpt", "value": "The answer is UK" } ], "clip_score": 14.609375, "loss": 4.079787254333496 }, { "id": "identity_75446", "image": "ScienceQA/images/16560.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which country borders the highlighted country to the south?" }, { "from": "gpt", "value": "The answer is Guatemala" } ], "clip_score": 24.0, "loss": 2.7580955028533936 }, { "id": "identity_135659", "image": "ChartQA/images/two_col_101183.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of deals represented in the year 2019?" }, { "from": "gpt", "value": "The answer is 54" } ], "clip_score": 21.828125, "loss": 2.415802001953125 }, { "id": "identity_39715", "image": "FigureQA/images/59650.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the pie chart have any other colors besides Medium Aqua and Green Yellow?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 34.65625, "loss": 3.2177627086639404 }, { "id": "identity_210488", "image": "VQA2.0/images/216142.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary color of the bus?" }, { "from": "gpt", "value": "The answer is Yellow" } ], "clip_score": 28.75, "loss": 2.938159227371216 }, { "id": "identity_15055", "image": "UniGeo/images/1037.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,AB是⊙O的直径,CD是⊙O的弦,若∠BAD=48°,则∠DCA的大小为()\nChoices:\n(A) 48°\n(B) 42°\n(C) 45°\n(D) 24°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.359375, "loss": 2.781815767288208 }, { "id": "identity_221783", "image": "TextVQA/images/026430da02beed79.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of clothing is the person sitting in seat number 10 wearing?" }, { "from": "gpt", "value": "The answer is A white wedding dress" } ], "clip_score": 20.71875, "loss": 3.244847297668457 }, { "id": "identity_253125", "image": "TabMWP/images/1520.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of miles run by the track team members last week, and how does this compare to the median number of miles run?" }, { "from": "gpt", "value": "The answer is Average is 6.375, median is 6.5" } ], "clip_score": 23.625, "loss": 2.0783677101135254 }, { "id": "identity_31243", "image": "ScienceQA/images/6134.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Identify the question that Sidney's experiment can best answer.\nChoices:\n(A) Do more tomato seedlings grow when they are planted in soil with fertilizer compared to soil without fertilizer?\n(B) Does the humidity level where tomato seeds are planted affect the number of tomato seedlings that grow?" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 26.109375, "loss": 3.646606922149658 }, { "id": "identity_117201", "image": "VQA2.0/images/542974.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the horse in the image?" }, { "from": "gpt", "value": "The answer is Brown" } ], "clip_score": 20.359375, "loss": 3.186011791229248 }, { "id": "identity_161666", "image": "UniGeo/images/1450.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the area of the equilateral triangle ABC with side length 4?" }, { "from": "gpt", "value": "The answer is 4*sqrt(3)" } ], "clip_score": 32.125, "loss": 1.4395111799240112 }, { "id": "identity_177933", "image": "IconQA/images/31313.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If you remove one orange marble, what fraction of the remaining marbles will be black?" }, { "from": "gpt", "value": "The answer is 1/4" } ], "clip_score": 34.21875, "loss": 1.7449097633361816 }, { "id": "identity_68875", "image": "ScienceQA/images/13866.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average final temperature of both greenhouses?" }, { "from": "gpt", "value": "The answer is 21.5°C" } ], "clip_score": 28.171875, "loss": 1.4024021625518799 }, { "id": "identity_271932", "image": "GeoQA+/images/495.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the park, a stone arch bridge is shown in the shape of a circular arc (minor arc) with a span of 12m and a radius of 10m. What is the height of the arch?\nChoices:\n(A) 3m\n(B) 2m\n(C) 4m\n(D) √{3}m" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.0, "loss": 2.8820834159851074 }, { "id": "identity_306665", "image": "DocVQA/images/slxd0227_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned here?" }, { "from": "gpt", "value": "The answer is March 4, 1976" } ], "clip_score": 21.4375, "loss": 1.9523444175720215 }, { "id": "identity_292842", "image": "DocVQA/images/jqkm0227_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount for Year 2?" }, { "from": "gpt", "value": "The answer is $18,690" } ], "clip_score": 20.828125, "loss": 2.4177870750427246 }, { "id": "identity_21200", "image": "IconQA/images/106721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What fraction of the shape is purple?\nChoices:\n(A) 1/5\n(B) 1/4\n(C) 1/3\n(D) 1/2" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 35.46875, "loss": 3.626852512359619 }, { "id": "identity_212480", "image": "VQA2.0/images/4239.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any other people visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 15.546875, "loss": 2.9647605419158936 }, { "id": "identity_25123", "image": "PMC-VQA/images/PMC4331293_Fig5_359188.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What specific part of the brain is affected by MCA vasospasm? \nChoices:\n(A) Right hemisphere \n(B) Cerebellum \n(C) Left hemisphere \n(D) Brainstem " }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 23.3125, "loss": 3.915950059890747 }, { "id": "identity_166713", "image": "IconQA/images/21059.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of dots on the dice shown in the image?" }, { "from": "gpt", "value": "The answer is 17" } ], "clip_score": 29.125, "loss": 2.114128589630127 }, { "id": "identity_61873", "image": "DocVQA/images/nzpg0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the subject of this memorandum?" }, { "from": "gpt", "value": "The answer is Excerpts from HDFP Policy Advisory Board Minutes of June 4, 1976, Meeting" } ], "clip_score": 31.15625, "loss": 1.932397484779358 }, { "id": "identity_88946", "image": "Geometry3K/images/446.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of side DF?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 30.296875, "loss": 3.0802390575408936 }, { "id": "identity_95045", "image": "TabMWP/images/8669.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Mario's Pizzeria sold twice as many slices, how many slices would that be?" }, { "from": "gpt", "value": "The answer is 5640" } ], "clip_score": 38.65625, "loss": 2.6601359844207764 }, { "id": "identity_80853", "image": "GeoQA+/images/334.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given triangle ABC, with AB = AC, and points D and E on BC and AC respectively, if AD = AE and ∠BAD = ∠CAD = 20°, what is the measure of ∠AED?" }, { "from": "gpt", "value": "The answer is 70°" } ], "clip_score": 34.0625, "loss": 1.6607953310012817 }, { "id": "identity_330105", "image": "DVQA/images/bar_train_00127790.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are these bars horizontal?" }, { "from": "gpt", "value": "The answer is yes" } ], "clip_score": 19.765625, "loss": 4.246586322784424 }, { "id": "identity_149679", "image": "DocVQA/images/kmcj0227_5.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the location of Hector Aguilar's present occupation?" }, { "from": "gpt", "value": "The answer is Guatemala City, Guatemala, C.A" } ], "clip_score": 20.625, "loss": 2.172523260116577 }, { "id": "identity_127513", "image": "A-OKVQA/images/568555.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the weather like in the image?" }, { "from": "gpt", "value": "The answer is Overcast and possibly rainy" } ], "clip_score": 18.0625, "loss": 2.982736587524414 }, { "id": "identity_215476", "image": "PMC-VQA/images/PMC4696287_Fig4_458028.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the image show any signs of pathology in the maxillary sinus?" }, { "from": "gpt", "value": "The answer is No signs of pathology in the maxillary sinus" } ], "clip_score": 29.78125, "loss": 1.7832832336425781 }, { "id": "identity_65296", "image": "TQA/images/question_images/parts_microscope_7202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which part of the microscope is used to hold the slide firmly in place?" }, { "from": "gpt", "value": "The answer is Stage clip" } ], "clip_score": 33.0, "loss": 2.9071662425994873 }, { "id": "identity_219875", "image": "A-OKVQA/images/391456.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the bus in the foreground of the image?" }, { "from": "gpt", "value": "The answer is Green" } ], "clip_score": 21.203125, "loss": 3.6340537071228027 }, { "id": "identity_127769", "image": "TextVQA/images/eb34a38e10f395b4.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many yellow taxis can be seen in the image?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 25.328125, "loss": 2.8146069049835205 }, { "id": "identity_253564", "image": "VQA2.0/images/244293.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles are visible in the image, and what is the color of the horse the cop is riding on?" }, { "from": "gpt", "value": "The answer is 8 vehicles, brown horse" } ], "clip_score": 31.21875, "loss": 2.8284401893615723 }, { "id": "identity_1172", "image": "ChartQA/images/multi_col_60046.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the smallest value in the 18-34 category?" }, { "from": "gpt", "value": "The answer is 57.1" } ], "clip_score": 15.7421875, "loss": 2.745048761367798 }, { "id": "identity_28659", "image": "IconQA/images/71539.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vertices does this shape have?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 19.515625, "loss": 2.0900182723999023 }, { "id": "identity_329231", "image": "Geometry3K/images/953.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Determine the length of JN if \\triangle JMN is an isosceles triangle with JM \\cong MN.\nChoices:\n(A) 2\n(B) 3\n(C) 4\n(D) 5" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.5625, "loss": 2.6516001224517822 }, { "id": "identity_308390", "image": "ScienceQA/images/15265.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Santa Fe\n(B) Boulder\n(C) Colorado Springs\n(D) Denver" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 27.515625, "loss": 4.30958890914917 }, { "id": "identity_317018", "image": "DVQA/images/bar_train_00089003.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which group of bars has the smallest single value in the entire chart?" }, { "from": "gpt", "value": "The answer is disk" } ], "clip_score": 29.8125, "loss": 3.228276491165161 }, { "id": "identity_282598", "image": "TabMWP/images/12091.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Does Stanley have sufficient funds to purchase both a basketball and a volleyball with his $34.00?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.40625, "loss": 4.038125038146973 }, { "id": "identity_133445", "image": "VizWiz/images/VizWiz_train_00006328.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main color theme of the CD cover?" }, { "from": "gpt", "value": "The answer is Black and white" } ], "clip_score": 28.40625, "loss": 2.9559619426727295 }, { "id": "identity_48660", "image": "DVQA/images/bar_train_00198031.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the category \"flight\" have a positive value for the blue bar?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 19.609375, "loss": 3.2412490844726562 }, { "id": "identity_313830", "image": "ScienceQA/images/6777.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the capital of this state?\nChoices:\n(A) Biloxi\n(B) Frankfort\n(C) Jackson\n(D) Raleigh" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.03125, "loss": 4.098267555236816 }, { "id": "identity_14545", "image": "DVQA/images/bar_train_00108699.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more accurate is the most accurate algorithm compared the least accurate algorithm?" }, { "from": "gpt", "value": "The answer is 80" } ], "clip_score": 33.5, "loss": 2.698795795440674 }, { "id": "identity_278794", "image": "PMC-VQA/images/PMC4469073_diagnostics-05-00189-f005_397227.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In which location is the abnormal uptake seen in the image? \nChoices:\n(A) Left anterior ilium.\n(B) Right posterior ilium.\n(C) Left posterior ilium.\n(D) Right anterior ilium." }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 20.796875, "loss": 4.119012832641602 }, { "id": "identity_18762", "image": "UniGeo/images/2875.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,△ABC顶点A、B、C都在⊙O上,连接AO、BO,若∠OAB=50°,则∠ACB的大小是()\nChoices:\n(A) 25°\n(B) 40°\n(C) 50°\n(D) 80°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 28.78125, "loss": 2.5455665588378906 }, { "id": "identity_292445", "image": "UniGeo/images/887.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Placing the protractor on the triangular cardboard as shown in the figure, with point C on the semicircle. If the readings of points A and B are 88° and 30° respectively, what is the measure of ∠ACB?\nChoices:\n(A) 15°\n(B) 28°\n(C) 29°\n(D) 34°" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 31.875, "loss": 2.5778801441192627 }, { "id": "identity_305730", "image": "PMC-VQA/images/PMC4956629_Fig12_54032.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the likely diagnosis based on this image?\nChoices:\n(A) Appendicitis\n(B) Peritonitis\n(C) Pancreatitis\n(D) Cholecystitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.328125, "loss": 4.484025955200195 }, { "id": "identity_160764", "image": "GeoQA+/images/4447.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If ABCD is a rhombus and the length of diagonal BD is 16 units, what is the area of the rhombus?" }, { "from": "gpt", "value": "The answer is (1/2)*16*√512" } ], "clip_score": 36.25, "loss": 2.1184403896331787 }, { "id": "identity_238130", "image": "UniGeo/images/3148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,在菱形ABCD中,∠A=110°,E,F分别是边AB和BC的中点,EP⊥CD于点P,求∠FPC的同时,求证△EFP为等腰三角形" }, { "from": "gpt", "value": "The answer is 35°" } ], "clip_score": 29.515625, "loss": 2.534219741821289 }, { "id": "identity_47704", "image": "DVQA/images/bar_train_00028529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the difference in units sold between the least and most popular item in the 'noon' store?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 32.3125, "loss": 2.793307304382324 }, { "id": "identity_182457", "image": "A-OKVQA/images/61854.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What beverage brand is the bottle on the table?" }, { "from": "gpt", "value": "The answer is Perrier" } ], "clip_score": 23.0, "loss": 3.056354284286499 }, { "id": "identity_318453", "image": "GeoQA+/images/1907.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,P是圆O外的一点,PA、PB与圆O分别相切于点A、B,C是劣弧AB上任一点,过点C的切线分别交PA、PB于点D、E。若∠P=38°,则∠DOE是多少?\n选项:\n(A) 38°\n(B) 52°\n(C) 70°\n(D) 71°" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 25.9375, "loss": 2.4172425270080566 }, { "id": "identity_194738", "image": "PMC-VQA/images/PMC3744490_pone-0072265-g004_224985.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the object located in the center of the image?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 20.046875, "loss": 4.008429527282715 }, { "id": "identity_138003", "image": "DVQA/images/bar_train_00157108.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any algorithms with a minimum accuracy below 2?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 29.375, "loss": 3.1096529960632324 }, { "id": "identity_211240", "image": "PMC-VQA/images/PMC8084928_Fig7_430849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of imaging technique is used to produce the images shown?" }, { "from": "gpt", "value": "The answer is Magnetic Resonance Imaging (MRI)" } ], "clip_score": 28.21875, "loss": 0.8733079433441162 }, { "id": "identity_161453", "image": "Geometry3K/images/1174.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the length of the side opposite to the 52° angle in the right triangle shown in the figure?" }, { "from": "gpt", "value": "The answer is 14.3" } ], "clip_score": 36.59375, "loss": 2.3586533069610596 }, { "id": "identity_92980", "image": "TabMWP/images/10882.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of photographs taken per day over the past 5 days?" }, { "from": "gpt", "value": "The answer is 4" } ], "clip_score": 34.4375, "loss": 2.622627019882202 }, { "id": "identity_285080", "image": "A-OKVQA/images/386712.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which form of art was utilized to preserve these deceased animals? \n(A) taxidermy \n(B) metalworking \n(C) sculpting \n(D) drawing" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 23.75, "loss": 4.384176731109619 }, { "id": "identity_162120", "image": "GeoQA+/images/3148.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB and CD are perpendicular chords intersecting at point P in circle O, and angle ABC is 70 degrees, what is the measure of angle AED?" }, { "from": "gpt", "value": "The answer is 160 degrees" } ], "clip_score": 38.375, "loss": 1.851094365119934 }, { "id": "identity_302849", "image": "IconQA/images/44385.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Move the ruler to measure the length of this to the nearest centimeter. This is about (_) centimeters long." }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 34.84375, "loss": 2.9598586559295654 }, { "id": "identity_285778", "image": "PMC-VQA/images/PMC7597045_Fig1_285913.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is depicted in the image? Is it:\n(A) Left kidney\n(B) Right kidney\n(C) Liver\n(D) Spleen" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.15625, "loss": 4.165987968444824 }, { "id": "identity_309186", "image": "IconQA/images/106677.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is the number of squirrels greater than the number of acorns?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 33.6875, "loss": 3.6191182136535645 }, { "id": "identity_99830", "image": "IconQA/images/107163.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average number of squares per bar?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 21.828125, "loss": 2.116877317428589 }, { "id": "identity_202885", "image": "VizWiz/images/VizWiz_train_00001254.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What brand is the laptop?" }, { "from": "gpt", "value": "The answer is MSI" } ], "clip_score": 29.046875, "loss": 4.142120838165283 }, { "id": "identity_260170", "image": "VQA2.0/images/363191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many beers are there in each of the two sections of the fridge, and what is the difference in the number of beers between the two sections?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 28.21875, "loss": 4.021965503692627 }, { "id": "identity_250077", "image": "DocVQA/images/kzng0227_24.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total amount of potassium in mg for 300g of gizzard, turkey, and 200g of goose, breast meat?" }, { "from": "gpt", "value": "The answer is 1350 mg" } ], "clip_score": 22.140625, "loss": 1.5363001823425293 }, { "id": "identity_119848", "image": "Super-CLEVR/images/superCLEVR_new_001266.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects in the image have wheels?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 22.71875, "loss": 2.309368371963501 }, { "id": "identity_179248", "image": "CLEVR-Math/images/CLEVR_train_046988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are spheres?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.5, "loss": 2.515866994857788 }, { "id": "identity_287796", "image": "GeoQA+/images/1529.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram, BD is the diameter of circle O, points A and C lie on circle O, AB=AD, and AC intersects BD at point G. If ∠COD=120°, what is the degree measure of ∠AGB?\nChoices:\n(A) 96°\n(B) 105°\n(C) 107°\n(D) 114°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 36.65625, "loss": 2.328784227371216 }, { "id": "identity_276982", "image": "IconQA/images/2187.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the total number of triangles present?\nChoices:\n(A) 5\n(B) 8\n(C) 10\n(D) 3\n(E) 9" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.21875, "loss": 3.3117458820343018 }, { "id": "identity_133735", "image": "Super-CLEVR/images/superCLEVR_new_001202.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the largest vehicle in the image?" }, { "from": "gpt", "value": "The answer is Purple" } ], "clip_score": 25.21875, "loss": 2.747756242752075 }, { "id": "identity_4379", "image": "TQA/images/question_images/hair_follicles_6994.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: How many hairs can you see?\nChoices:\n(A) 6\n(B) 8\n(C) 10\n(D) 2" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 21.234375, "loss": 3.020127296447754 }, { "id": "identity_128352", "image": "PMC-VQA/images/PMC7880116_f0003_366918.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does this image show any signs of brain atrophy?" }, { "from": "gpt", "value": "The answer is Yes, there are signs of brain atrophy" } ], "clip_score": 31.5, "loss": 1.8468531370162964 }, { "id": "identity_197370", "image": "VQA2.0/images/60415.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the child's jacket?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 20.625, "loss": 3.551896572113037 }, { "id": "identity_162427", "image": "GeoQA+/images/386.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In parallelogram ABCD, if AB = 6 and BC = 8, what is the area of triangle ACD?" }, { "from": "gpt", "value": "The answer is 24" } ], "clip_score": 34.875, "loss": 1.8176496028900146 }, { "id": "identity_174306", "image": "TabMWP/images/11513.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How much more money does Michelle need to buy a large cardboard box?" }, { "from": "gpt", "value": "The answer is $0.01" } ], "clip_score": 24.234375, "loss": 2.16898250579834 }, { "id": "identity_154399", "image": "ScienceQA/images/4462.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Vermont?" }, { "from": "gpt", "value": "The answer is Massachusetts" } ], "clip_score": 31.765625, "loss": 3.15950345993042 }, { "id": "identity_334886", "image": "TextVQA/images/0db3238edca36d72.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What's the number on this?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 12.109375, "loss": 2.365112066268921 }, { "id": "identity_267422", "image": "DocVQA/images/tgym0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: On what date was it dispatched?" }, { "from": "gpt", "value": "The answer is 5/16/80" } ], "clip_score": 14.484375, "loss": 2.237119436264038 }, { "id": "identity_278721", "image": "UniGeo/images/1407.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the figure, the radii of two concentric circles are 3cm and 5cm respectively. Chord AB is tangent to the smaller circle at point C. What is the length of AB?\n(A) 4cm\n(B) 5cm\n(C) 6cm\n(D) 8cm" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 39.71875, "loss": 2.3190181255340576 }, { "id": "identity_140686", "image": "DVQA/images/bar_train_00153215.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which item has the highest total sales across all stores?" }, { "from": "gpt", "value": "The answer is top" } ], "clip_score": 29.71875, "loss": 3.222651958465576 }, { "id": "identity_187058", "image": "PMC-VQA/images/PMC6447645_Fig5_460853.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the joint that is primarily shown in the image?" }, { "from": "gpt", "value": "The answer is Knee joint" } ], "clip_score": 30.234375, "loss": 2.4342944622039795 }, { "id": "identity_152725", "image": "ScienceQA/images/10778.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the predominant color of the bird's feathers?" }, { "from": "gpt", "value": "The answer is Blue" } ], "clip_score": 29.25, "loss": 3.5756068229675293 }, { "id": "identity_102577", "image": "TabMWP/images/12512.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If Jen also bought 1/2 pound of colored paper clips, how much did she spend in total?" }, { "from": "gpt", "value": "The answer is $6.60" } ], "clip_score": 31.015625, "loss": 2.0575931072235107 }, { "id": "identity_35248", "image": "VQA2.0/images/304849.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many archways are visible?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 21.890625, "loss": 2.7658236026763916 }, { "id": "identity_262264", "image": "GeoQA+/images/3416.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the diagram, quadrilateral ADCB is inscribed in circle O, with angle AOB measuring 122°. What is the measure of angle ACB?\nChoices:\n(A) 131°\n(B) 119°\n(C) 122°\n(D) 58°" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 35.71875, "loss": 2.7311439514160156 }, { "id": "identity_37449", "image": "VQA2.0/images/554036.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is behind the catcher?" }, { "from": "gpt", "value": "The answer is umpire" } ], "clip_score": 25.828125, "loss": 2.8425097465515137 }, { "id": "identity_206445", "image": "PMC-VQA/images/PMC7921273_Fig1_380477.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the lesion in the EOB-MRI image show signs of necrosis?" }, { "from": "gpt", "value": "The answer is No signs of necrosis are visible within the lesion" } ], "clip_score": 29.8125, "loss": 2.2403781414031982 }, { "id": "identity_72445", "image": "AI2D/images/3655.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the function of the structure labeled as \"Esophagus\"?" }, { "from": "gpt", "value": "The answer is Transports food to the stomach" } ], "clip_score": 29.515625, "loss": 1.7832201719284058 }, { "id": "identity_228220", "image": "A-OKVQA/images/181367.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two objects closest to the woman on the left, and what is their relative position to each other?" }, { "from": "gpt", "value": "The answer is Bicycle and recumbent tricycle, with the tricycle to the right" } ], "clip_score": 24.015625, "loss": 2.2820563316345215 }, { "id": "identity_291678", "image": "ScienceQA/images/1568.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which question is most suitable for Simon's experiment to address:\n(A) Does the temperature impact the growth of bacteria in liquid?\n(B) Is there a difference in bacterial growth between liquid with cinnamon and liquid without cinnamon?" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.75, "loss": 4.042945861816406 }, { "id": "identity_201888", "image": "PMC-VQA/images/PMC9417040_pone.0272639.g013_386901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any visible pathology in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.4375, "loss": 2.784386396408081 }, { "id": "identity_120405", "image": "VizWiz/images/VizWiz_train_00013901.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the background of the image a natural or man-made environment?" }, { "from": "gpt", "value": "The answer is The background is a (natural/man-made) environment" } ], "clip_score": 17.46875, "loss": 2.7602429389953613 }, { "id": "identity_133331", "image": "PMC-VQA/images/PMC6678645_ijms-20-03382-f002_18310.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you identify the type of MRI sequence used in this image?" }, { "from": "gpt", "value": "The answer is T1-weighted MRI sequence" } ], "clip_score": 33.4375, "loss": 1.492324948310852 }, { "id": "identity_189434", "image": "VQA2.0/images/176483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many people are visible in the image?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 18.328125, "loss": 2.2409324645996094 }, { "id": "identity_23668", "image": "TextVQA/images/009847496908a1ad.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: what's the number on the white shirt?" }, { "from": "gpt", "value": "The answer is 15" } ], "clip_score": 11.6796875, "loss": 2.191964864730835 }, { "id": "identity_12525", "image": "AI2D/images/632.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What channel connects the magma chamber to the exit?\nChoices:\n(A) main vent/conduit\n(B) none of the above\n(C) crater\n(D) ash cloud" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.3125, "loss": 3.950299024581909 }, { "id": "identity_202824", "image": "A-OKVQA/images/562916.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the man's hat?" }, { "from": "gpt", "value": "The answer is Black" } ], "clip_score": 15.4296875, "loss": 3.5723602771759033 }, { "id": "identity_85134", "image": "GeoQA+/images/4217.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the measure of angle BCD in the quadrilateral ABCD?" }, { "from": "gpt", "value": "The answer is 90°" } ], "clip_score": 31.6875, "loss": 1.8236804008483887 }, { "id": "identity_295971", "image": "PMC-VQA/images/PMC7220413_F1_168808.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What do the images depict?\nOptions:\n(A) Dynamic changes of COVID-19\n(B) Dynamic changes of Acute Pancreatitis\n(C) Dynamic changes of Pneumonia and Acute Perforated Appendicitis\n(D) Dynamic changes of Breast Cancer" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 24.5, "loss": 3.656750440597534 }, { "id": "identity_25092", "image": "TabMWP/images/14806.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Maureen went to the store. She bought 0.7 pounds of alphabet confetti. How much did she spend?" }, { "from": "gpt", "value": "The answer is 7.70" } ], "clip_score": 25.640625, "loss": 2.2709577083587646 }, { "id": "identity_267387", "image": "TQA/images/question_images/cycle_nitrogen_6721.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which step is depicted in the diagram provided?\nOptions:\n(A) 4\n(B) 3\n(C) 1\n(D) 2" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 11.3359375, "loss": 3.4252896308898926 }, { "id": "identity_201684", "image": "TextVQA/images/322f6f4d72f39ae9.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What word is written below \"VILLA CAPRI\"?" }, { "from": "gpt", "value": "The answer is MOTEL" } ], "clip_score": 41.375, "loss": 3.070784330368042 }, { "id": "identity_173498", "image": "IconQA/images/6339.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many lines of symmetry does this shape have?" }, { "from": "gpt", "value": "The answer is Infinite" } ], "clip_score": 29.0625, "loss": 3.387957811355591 }, { "id": "identity_222202", "image": "VQA2.0/images/389137.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any traffic lights in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 22.265625, "loss": 3.301435947418213 }, { "id": "identity_254528", "image": "TabMWP/images/18258.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the probability that a randomly selected contestant charmed 5-20 worms and tried tapping the ground?" }, { "from": "gpt", "value": "The answer is 1/2" } ], "clip_score": 32.9375, "loss": 1.8500947952270508 }, { "id": "identity_7006", "image": "DVQA/images/bar_train_00015454.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is each bar a single solid color without patterns?" }, { "from": "gpt", "value": "The answer is no" } ], "clip_score": 15.7578125, "loss": 3.4660887718200684 }, { "id": "identity_139429", "image": "MapQA/images/map_6983.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state has the lowest number of dates according to the map?" }, { "from": "gpt", "value": "The answer is A state in the range of 3235737-3534667" } ], "clip_score": 32.5, "loss": 2.6868069171905518 }, { "id": "identity_153423", "image": "ScienceQA/images/6383.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which direction is New Zealand from Australia in this image?" }, { "from": "gpt", "value": "The answer is East" } ], "clip_score": 31.640625, "loss": 3.1211915016174316 }, { "id": "identity_310712", "image": "IconQA/images/71491.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the perimeter of this?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 31.109375, "loss": 1.8422433137893677 }, { "id": "identity_165078", "image": "UniGeo/images/2280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the ratio of the area of triangle ADE to the area of triangle ABC?" }, { "from": "gpt", "value": "The answer is 1/9" } ], "clip_score": 31.328125, "loss": 2.0376334190368652 }, { "id": "identity_284137", "image": "TextVQA/images/3f4e503184a49931.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which taxi number is on the left?" }, { "from": "gpt", "value": "The answer is 4k72" } ], "clip_score": 27.359375, "loss": 2.180952548980713 }, { "id": "identity_197680", "image": "PMC-VQA/images/PMC8046147_jbm410472-fig-0005_417800.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the orientation of the foot in the image?" }, { "from": "gpt", "value": "The answer is Dorsoplantar view" } ], "clip_score": 30.921875, "loss": 3.2763068675994873 }, { "id": "identity_23286", "image": "ScienceQA/images/6166.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which solution has a higher concentration of purple particles?\nChoices:\n(A) neither; their concentrations are the same\n(B) Solution B\n(C) Solution A" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 31.46875, "loss": 3.1501317024230957 }, { "id": "identity_247002", "image": "TabMWP/images/34280.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of members in all the committees combined?" }, { "from": "gpt", "value": "The answer is 353" } ], "clip_score": 21.953125, "loss": 2.5710203647613525 }, { "id": "identity_313005", "image": "FigureQA/images/29401.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is this less than Orange?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 18.578125, "loss": 3.8628997802734375 }, { "id": "identity_66547", "image": "ScienceQA/images/2190.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the rubber part of the toilet plunger?" }, { "from": "gpt", "value": "The answer is Circular" } ], "clip_score": 31.828125, "loss": 3.4294424057006836 }, { "id": "identity_204136", "image": "PMC-VQA/images/PMC6003163_Fig1_329548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the image a cross-sectional view of the brain at the level of the ventricles?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 34.5, "loss": 2.99418306350708 }, { "id": "identity_299873", "image": "DVQA/images/bar_train_00025478.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the number of units sold for the least popular item?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 28.046875, "loss": 2.721104621887207 }, { "id": "identity_94457", "image": "TabMWP/images/19939.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who collected the least number of newspapers?" }, { "from": "gpt", "value": "The answer is Winston" } ], "clip_score": 32.125, "loss": 1.9958057403564453 }, { "id": "identity_4020", "image": "GeoQA+/images/379.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: 如图,P是⊙O外一点,射线PA、PB分别切⊙O于点A、点B,CD切⊙O于点E,分别交PA、PB于点D、点C,若PB=4,则△PCD的周长()\nChoices:\n(A) 4\n(B) 6\n(C) 8\n(D) 10" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 25.484375, "loss": 2.465031385421753 }, { "id": "identity_232344", "image": "TextVQA/images/00c9f3f8e860c48f.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the bus route number and its destination?" }, { "from": "gpt", "value": "The answer is Route 12 to Carisa-Hospital" } ], "clip_score": 28.109375, "loss": 3.1576309204101562 }, { "id": "identity_185224", "image": "PMC-VQA/images/PMC6214924_Fig4_390574.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any text or symbol indicating the orientation of the scan?" }, { "from": "gpt", "value": "The answer is No specific orientation markers visible" } ], "clip_score": 18.796875, "loss": 4.780866622924805 }, { "id": "identity_161164", "image": "GeoQA+/images/4490.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If AB is parallel to CD, what is the measure of angle ECD?" }, { "from": "gpt", "value": "The answer is 62 degrees" } ], "clip_score": 32.21875, "loss": 2.6446633338928223 }, { "id": "identity_255177", "image": "CLEVR-Math/images/CLEVR_train_047038.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Subtract all large metal cylinders and all spheres. How many non-spherical objects are left?" }, { "from": "gpt", "value": "The answer is 3" } ], "clip_score": 28.859375, "loss": 2.292156219482422 }, { "id": "identity_235485", "image": "Super-CLEVR/images/superCLEVR_new_001053.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of vehicles (articulated buses, bicycles, and other vehicles) in the image, and is this number greater than the sum of the number of articulated buses and bicycles?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 23.578125, "loss": 2.911478042602539 }, { "id": "identity_245495", "image": "Geometry3K/images/1622.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Find the perimeter of the figure" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 33.46875, "loss": 2.5936081409454346 }, { "id": "identity_142215", "image": "PlotQA/images/54439.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average population percentage for all income levels in 1961?" }, { "from": "gpt", "value": "The answer is 17.5%" } ], "clip_score": 28.71875, "loss": 1.577178955078125 }, { "id": "identity_219180", "image": "PMC-VQA/images/PMC9369558_jcm-11-04556-f001_369917.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of a measurement standard deviation in the image?" }, { "from": "gpt", "value": "The answer is Yes, there is an indication of standard deviation" } ], "clip_score": 14.8125, "loss": 2.160834789276123 }, { "id": "identity_246480", "image": "UniGeo/images/508.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图, 是一个圆锥形纸杯的侧面展开图, 已知圆锥底面半径为5cm, 母线长为15cm, 那么纸杯的总表面积为多少平方厘米?" }, { "from": "gpt", "value": "The answer is 100π cm²" } ], "clip_score": 24.96875, "loss": 1.748220443725586 }, { "id": "identity_261360", "image": "DocVQA/images/zxbw0217_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the deadline for submitting the project 'Paper PC(3)'?" }, { "from": "gpt", "value": "The answer is April 2003" } ], "clip_score": 24.84375, "loss": 2.800771474838257 }, { "id": "identity_125530", "image": "VizWiz/images/VizWiz_train_00002590.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the coffee cup lid?" }, { "from": "gpt", "value": "The answer is White" } ], "clip_score": 31.28125, "loss": 3.2506165504455566 }, { "id": "identity_71070", "image": "ScienceQA/images/20598.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify the molecule that is not a compound" }, { "from": "gpt", "value": "The answer is oxygen" } ], "clip_score": 30.78125, "loss": 2.2646915912628174 }, { "id": "identity_232950", "image": "A-OKVQA/images/417983.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many elephants are in the field she is looking at, and what is the predominant color of the vegetation in the field?" }, { "from": "gpt", "value": "The answer is two elephants, green vegetation" } ], "clip_score": 26.890625, "loss": 2.271519422531128 }, { "id": "identity_66713", "image": "AI2D/images/137.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the force envelope produced by the Sun as shown in the diagram?" }, { "from": "gpt", "value": "The answer is Smaller tidal force envelope" } ], "clip_score": 26.953125, "loss": 2.447611093521118 }, { "id": "identity_6151", "image": "VQA2.0/images/481920.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What words are written on the back of the chair?" }, { "from": "gpt", "value": "The answer is us open" } ], "clip_score": 26.6875, "loss": 2.9398653507232666 }, { "id": "identity_121990", "image": "VQA-AS/images/14291.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the main subject of the painting on the wall?" }, { "from": "gpt", "value": "The answer is A portrait of a person" } ], "clip_score": 24.1875, "loss": 3.0620687007904053 }, { "id": "identity_249476", "image": "TQA/images/question_images/earth_eclipses_695.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the conditions under which each type of eclipse (total, partial, and annular) occurs, and explain the differences in the alignment of the Sun, Moon, and Earth for each type" }, { "from": "gpt", "value": "The answer is Total, Partial, and Annular eclipses occur under specific alignments" } ], "clip_score": 17.59375, "loss": 2.1078267097473145 }, { "id": "identity_101207", "image": "IconQA/images/31220.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many ovals are there in the image?" }, { "from": "gpt", "value": "The answer is 20" } ], "clip_score": 30.09375, "loss": 2.0016250610351562 }, { "id": "identity_167401", "image": "CLEVR-Math/images/CLEVR_train_002194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many objects are yellow?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 25.8125, "loss": 2.3766086101531982 }, { "id": "identity_61488", "image": "DocVQA/images/fjbw0217_13.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the dosage of Medroxyprogesterone Acetate for treatment group C?" }, { "from": "gpt", "value": "The answer is 5.0 mg" } ], "clip_score": 30.171875, "loss": 2.1297056674957275 }, { "id": "identity_278857", "image": "CLEVR-Math/images/CLEVR_train_028821.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: After removing all the green blocks, what is the total number of blocks remaining?" }, { "from": "gpt", "value": "The answer is 2" } ], "clip_score": 23.28125, "loss": 2.3784244060516357 }, { "id": "identity_286085", "image": "PMC-VQA/images/PMC5985138_fig1_323495.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What is the indication shown on the radiograph?\nChoices:\n(A) Large bowel obstruction \n(B) Small bowel obstruction \n(C) Liver cirrhosis \n(D) Pancreatitis" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.515625, "loss": 4.153084754943848 }, { "id": "identity_59444", "image": "DocVQA/images/kybd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the two organizations co-sponsoring the Symposium on Food Standards?" }, { "from": "gpt", "value": "The answer is The Council on Foods and Nutrition of the American Medical Association and the AMA-Food Industry Liaison Committee" } ], "clip_score": 31.28125, "loss": 1.0780667066574097 }, { "id": "identity_317894", "image": "IconQA/images/7194.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What time is indicated on the clock?" }, { "from": "gpt", "value": "The answer is 4:00" } ], "clip_score": 30.296875, "loss": 2.069171667098999 }, { "id": "identity_62382", "image": "DocVQA/images/qngh0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the unique SKU number for the material \"Plug Wrap\" under Brand 1?" }, { "from": "gpt", "value": "The answer is 0057823" } ], "clip_score": 20.609375, "loss": 2.5668931007385254 }, { "id": "identity_275296", "image": "FigureQA/images/48728.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which color is greater, Orange Red or Purple?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 29.03125, "loss": 3.810014247894287 }, { "id": "identity_146375", "image": "DocVQA/images/phnm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What company is mentioned in the body of the memorandum?" }, { "from": "gpt", "value": "The answer is Doubleday & Company, Inc" } ], "clip_score": 20.96875, "loss": 3.1877593994140625 }, { "id": "identity_25990", "image": "TabMWP/images/23400.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Dr. Chapman, a zoo director, researched ticket prices at other zoos around the country. Which charges more for a child ticket, the Minnesota Zoo or the Brookfield Zoo?\nChoices:\n(A) Brookfield\n(B) Minnesota" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 40.15625, "loss": 3.7949891090393066 }, { "id": "identity_290057", "image": "VQA2.0/images/569591.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can you tell me what is written on the post?" }, { "from": "gpt", "value": "The answer is handicap" } ], "clip_score": 12.453125, "loss": 2.8964271545410156 }, { "id": "identity_192200", "image": "PMC-VQA/images/PMC5086477_Fig2_89351.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What anatomical structure is located at the area of abnormal uptake indicated by the arrow?" }, { "from": "gpt", "value": "The answer is The bladder" } ], "clip_score": 25.359375, "loss": 3.3136818408966064 }, { "id": "identity_268760", "image": "PMC-VQA/images/PMC2845548_F2_60439.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which type of medical imaging was utilized? \nOptions:\n(A) CT scan\n(B) MRI\n(C) X-ray\n(D) PET scan" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 27.875, "loss": 4.006831169128418 }, { "id": "identity_48653", "image": "FigureQA/images/15298.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Does the color Blue have a larger portion than the color Brown?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 27.921875, "loss": 3.0406394004821777 }, { "id": "identity_298249", "image": "IconQA/images/43748.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What action has been performed on this letter? \nChoices:\n(A) flip\n(B) turn\n(C) slide" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 24.71875, "loss": 4.9034037590026855 }, { "id": "identity_216082", "image": "Super-CLEVR/images/superCLEVR_new_000390.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles have blue windows?" }, { "from": "gpt", "value": "The answer is 1" } ], "clip_score": 23.484375, "loss": 2.6562981605529785 }, { "id": "identity_32558", "image": "DocVQA/images/xmvd0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the Office Telephone?" }, { "from": "gpt", "value": "The answer is 962-1561" } ], "clip_score": 16.953125, "loss": 2.396063804626465 }, { "id": "identity_199663", "image": "Super-CLEVR/images/superCLEVR_new_000382.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many vehicles in the image have wheels?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 27.078125, "loss": 2.382765769958496 }, { "id": "identity_224569", "image": "AI2D/images/1836.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the primary, secondary, and tertiary consumers in the food web, starting from the grasshopper?" }, { "from": "gpt", "value": "The answer is Grasshopper (primary), bird (secondary), owl (tertiary)" } ], "clip_score": 33.9375, "loss": 1.5195558071136475 }, { "id": "identity_76462", "image": "TQA/images/question_images/earth_moon_phases_6004.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What phase follows the Last Quarter in the lunar cycle?" }, { "from": "gpt", "value": "The answer is Waning Crescent" } ], "clip_score": 36.34375, "loss": 1.548662543296814 }, { "id": "identity_46328", "image": "DocVQA/images/prfv0228_2.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the full address of the City of Hope Medical Center?" }, { "from": "gpt", "value": "The answer is 1500 E. Duarte Road, Duarte, CA 91010" } ], "clip_score": 25.5625, "loss": 0.8436959981918335 }, { "id": "identity_48414", "image": "FigureQA/images/8851.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the pie chart?" }, { "from": "gpt", "value": "The answer is title" } ], "clip_score": 34.4375, "loss": 3.828742265701294 }, { "id": "identity_38178", "image": "A-OKVQA/images/388374.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What type of trees are growing in this location?\nChoices:\n(A) pine trees\n(B) palm trees\n(C) willow trees\n(D) birch trees" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 12.4375, "loss": 4.026093482971191 }, { "id": "identity_43336", "image": "DVQA/images/bar_train_00058756.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the accuracy of the \"size\" algorithm less than 4?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 30.125, "loss": 3.324906826019287 }, { "id": "identity_284667", "image": "UniGeo/images/3209.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: In the given diagram of a river embankment cross-section, with embankment height BC=8m and water-facing slope angle ∠BAC=30°, what is the length of AB?\nChoices:\n(A) 16m\n(B) 6√{3}m\n(C) 8√{3}m\n(D) 7√{3}m" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 30.03125, "loss": 2.4451353549957275 }, { "id": "identity_75389", "image": "ScienceQA/images/15915.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which continent is primarily covered by the outlined area on the map?" }, { "from": "gpt", "value": "The answer is Africa" } ], "clip_score": 21.15625, "loss": 3.0175397396087646 }, { "id": "identity_4630", "image": "IconQA/images/68460.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What number is shown?" }, { "from": "gpt", "value": "The answer is 54" } ], "clip_score": 19.3125, "loss": 2.374349594116211 }, { "id": "identity_220134", "image": "TextVQA/images/064dcf621a91d9ac.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the restaurant above McDonald's?" }, { "from": "gpt", "value": "The answer is HIMAGOYA" } ], "clip_score": 22.671875, "loss": 4.744145393371582 }, { "id": "identity_28358", "image": "Geometry3K/images/1034.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find the area of the shaded region. Round to the nearest tenth.\nChoices:\n(A) 7.1\n(B) 7.7\n(C) 21.9\n(D) 28.9" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 26.296875, "loss": 2.9571995735168457 }, { "id": "identity_42155", "image": "MapQA/images/map_9589.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the title of the map?" }, { "from": "gpt", "value": "The answer is The Percentage of candidates in the USA" } ], "clip_score": 37.1875, "loss": 1.9061179161071777 }, { "id": "identity_32267", "image": "TabMWP/images/13282.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Kelsey has $327. Does she have enough to buy a bearded dragon and a chameleon?\nChoices:\n(A) yes\n(B) no" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 31.4375, "loss": 3.885341167449951 }, { "id": "identity_298167", "image": "PMC-VQA/images/PMC7901204_Fig1_372857.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What were the findings of the non-contrast CT scan? \nChoices:\n(A) Left hyperdense middle cerebral artery sign\n(B) Right hyperdense middle cerebral artery sign\n(C) Both left and right hyperdense middle cerebral artery signs\n(D) No hyperdense middle cerebral artery sign." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 30.625, "loss": 4.294083595275879 }, { "id": "identity_148353", "image": "DocVQA/images/ftmf0024_3.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What companies are mentioned in the footnote?" }, { "from": "gpt", "value": "The answer is RJR, Beckel Cowen, Ramhurst, Smith Johnson Anderson & Carr, and Bob Burnette" } ], "clip_score": 24.90625, "loss": 4.087153911590576 }, { "id": "identity_197487", "image": "TextVQA/images/8994f0a839f29131.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of products are in the wooden crate?" }, { "from": "gpt", "value": "The answer is Jams and preserves" } ], "clip_score": 24.125, "loss": 2.730517864227295 }, { "id": "identity_260478", "image": "VQA2.0/images/515075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many players from each team are visible in the image, and what is the total number of players visible?" }, { "from": "gpt", "value": "The answer is 7 players" } ], "clip_score": 22.453125, "loss": 3.329540252685547 }, { "id": "identity_309880", "image": "DVQA/images/bar_train_00196591.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of people prefer the top choice?" }, { "from": "gpt", "value": "The answer is 90" } ], "clip_score": 23.21875, "loss": 2.475748300552368 }, { "id": "identity_288667", "image": "AI2D/images/871.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Based on the diagram provided, which of the following is identified as a food source for the hawk?\n(A) snake\n(B) duckweed\n(C) duck\n(D) snail" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 34.375, "loss": 3.3989098072052 }, { "id": "identity_65241", "image": "TQA/images/question_images/parts_skeleton_7261.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What bone is directly connected to the \"Humerus\"?" }, { "from": "gpt", "value": "The answer is Radius and Ulna" } ], "clip_score": 27.59375, "loss": 1.6655640602111816 }, { "id": "identity_202049", "image": "A-OKVQA/images/377589.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any animals visible in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 12.6953125, "loss": 3.134735107421875 }, { "id": "identity_280106", "image": "FigureQA/images/84644.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Dark Periwinkle the color with the smallest area under the curve?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 19.71875, "loss": 4.054495334625244 }, { "id": "identity_74344", "image": "AI2D/images/136.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many types of tides are depicted in the diagram?" }, { "from": "gpt", "value": "The answer is Two" } ], "clip_score": 28.921875, "loss": 4.34842586517334 }, { "id": "identity_279426", "image": "FigureQA/images/98525.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Is Peru the country with the highest value?\nChoices:\n(A) no\n(B) yes" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 20.265625, "loss": 4.250767707824707 }, { "id": "identity_336357", "image": "A-OKVQA/images/485483.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What does the number 84 indicate?\nChoices:\n(A) table number\n(B) store number\n(C) order number\n(D) price" }, { "from": "gpt", "value": "The answer is C" } ], "clip_score": 7.6953125, "loss": 4.223732948303223 }, { "id": "identity_302723", "image": "ScienceQA/images/15021.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which better describes this ecosystem?\nChoices:\n(A) It has warm, dry summers. It also has many different types of trees.\n(B) It has warm, wet summers. It also has only a few types of trees." }, { "from": "gpt", "value": "The answer is B" } ], "clip_score": 22.40625, "loss": 3.7068119049072266 }, { "id": "identity_242081", "image": "GeoQA+/images/4197.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a∥b,∠1=85°,∠2=30°,求∠3和∠4的和是多少?" }, { "from": "gpt", "value": "The answer is 235°" } ], "clip_score": 30.40625, "loss": 1.923764944076538 }, { "id": "identity_304673", "image": "TQA/images/question_images/human_system_muscular_6156.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What are the muscles located at the base of this area called?\nChoices:\n(A) Teres\n(B) Deltoid\n(C) Triceps\n(D) Trapezius" }, { "from": "gpt", "value": "The answer is D" } ], "clip_score": 30.796875, "loss": 4.061558723449707 }, { "id": "identity_178037", "image": "IconQA/images/1594.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the shape is a pie, and each segment represents a slice, how many slices are not blue?" }, { "from": "gpt", "value": "The answer is 7" } ], "clip_score": 35.375, "loss": 2.416722059249878 }, { "id": "identity_196968", "image": "PMC-VQA/images/PMC8202118_f1_467793.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any indication of pathology in the image?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 27.578125, "loss": 2.8201916217803955 }, { "id": "identity_227004", "image": "ScienceQA/images/129.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Describe the life cycle of a pine tree starting from the seed stage and identify the role of male cones in this cycle" }, { "from": "gpt", "value": "The answer is Male cones produce pollen necessary for fertilization in pine trees" } ], "clip_score": 39.09375, "loss": 2.206899642944336 }, { "id": "identity_230635", "image": "AI2D/images/4010.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What are the states of matter for all the layers of the Earth from the crust to the inner core?" }, { "from": "gpt", "value": "The answer is Rigid, Plastic, Liquid, Rigid" } ], "clip_score": 33.90625, "loss": 1.6819517612457275 }, { "id": "identity_59212", "image": "FigureQA/images/77899.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of values for Tomato across the x-axis?" }, { "from": "gpt", "value": "The answer is 10" } ], "clip_score": 26.28125, "loss": 3.105069398880005 }, { "id": "identity_204967", "image": "TextVQA/images/f279bb5e48c9ea01.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is written on the box on the table?" }, { "from": "gpt", "value": "The answer is Coca-Cola" } ], "clip_score": 18.859375, "loss": 1.8523650169372559 }, { "id": "identity_107077", "image": "VQA-RAD/images/synpic47191.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Can we see any signs of free air in the abdominal cavity?" }, { "from": "gpt", "value": "The answer is No signs of free air in the abdominal cavity" } ], "clip_score": 24.0625, "loss": 1.7687506675720215 }, { "id": "identity_261676", "image": "UniGeo/images/498.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: If a sector has a radius of 30cm and a central angle of 120°, and it is used to create the lateral surface of a cone, what is the radius of the cone's base?\nChoices:\n(A) 10cm\n(B) 20cm\n(C) 10πcm\n(D) 20πcm" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.78125, "loss": 2.8954594135284424 }, { "id": "identity_129803", "image": "A-OKVQA/images/64551.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there any individual wearing a helmet in the image?" }, { "from": "gpt", "value": "The answer is No" } ], "clip_score": 20.109375, "loss": 3.08530855178833 }, { "id": "identity_181593", "image": "IconQA/images/72692.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If the dark line represents a piece of wood that needs to be cut into 3 equal pieces, how long would each piece be in millimeters?" }, { "from": "gpt", "value": "The answer is approximately 2.33 millimeters" } ], "clip_score": 34.53125, "loss": 1.90530264377594 }, { "id": "identity_231693", "image": "Super-CLEVR/images/superCLEVR_new_000718.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Identify all the vehicles in the image and determine the material of the vehicle that is the same color as the big dirtbike" }, { "from": "gpt", "value": "The answer is rubber" } ], "clip_score": 29.296875, "loss": 5.462851047515869 }, { "id": "identity_205142", "image": "VizWiz/images/VizWiz_train_00009981.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date displayed on the iPhone's screen?" }, { "from": "gpt", "value": "The answer is Sunday, October 14" } ], "clip_score": 31.671875, "loss": 1.7091621160507202 }, { "id": "identity_116419", "image": "TextVQA/images/3d8d4ecd1b90f548.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the shape of the object in the image?" }, { "from": "gpt", "value": "The answer is It is shaped like a claw or talon" } ], "clip_score": 25.390625, "loss": 2.785843849182129 }, { "id": "identity_267429", "image": "DocVQA/images/rrvw0217_6.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the publishing group that is referenced?" }, { "from": "gpt", "value": "The answer is THE PARTHENON PUBLISHING GROUP" } ], "clip_score": 23.109375, "loss": 1.1107181310653687 }, { "id": "identity_254359", "image": "DocVQA/images/rpfx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the average index of \"High-priced foods\" for the years 1970, 1972, and 1974?" }, { "from": "gpt", "value": "The answer is 130.93" } ], "clip_score": 30.015625, "loss": 1.859972357749939 }, { "id": "identity_33376", "image": "PMC-VQA/images/PMC7906016_F3_374092.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: What diagnostic method was used to obtain the images in Case 2? \nChoices:\n(A) Magnetic Resonance Imaging \n(B) X-ray \n(C) Computed Tomography (CT) \n(D) Positron Emission Tomography (PET) " }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.171875, "loss": 4.1234130859375 }, { "id": "identity_65592", "image": "TQA/images/abc_question_images/earth_eclipses_10684.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What celestial body is represented by the blue circle labeled R?" }, { "from": "gpt", "value": "The answer is The Earth" } ], "clip_score": 27.34375, "loss": 3.25069260597229 }, { "id": "identity_52011", "image": "FigureQA/images/49381.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the pie chart does the Pale Green section represent?" }, { "from": "gpt", "value": "The answer is Approximately 25%" } ], "clip_score": 38.125, "loss": 2.0830700397491455 }, { "id": "identity_292664", "image": "DVQA/images/bar_train_00149932.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of units sold by the best-selling item in the entire chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 32.84375, "loss": 2.5882039070129395 }, { "id": "identity_152335", "image": "ScienceQA/images/20808.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total volume of the rocky planets in our solar system according to the given data?" }, { "from": "gpt", "value": "The answer is 2,240 billion km^3" } ], "clip_score": 30.34375, "loss": 1.695501446723938 }, { "id": "identity_170493", "image": "IconQA/images/38783.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What percentage of the stars are in the top row?" }, { "from": "gpt", "value": "The answer is 55.56%" } ], "clip_score": 27.96875, "loss": 1.667556643486023 }, { "id": "identity_227763", "image": "DocVQA/images/sxvw0217_7.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many articles are planned to be submitted to journals in Q3 2003, and what are their titles?" }, { "from": "gpt", "value": "The answer is Three articles" } ], "clip_score": 17.25, "loss": 4.483461380004883 }, { "id": "identity_39385", "image": "FigureQA/images/98139.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the maximum value on the y-axis for Medium Mint?" }, { "from": "gpt", "value": "The answer is 94" } ], "clip_score": 23.609375, "loss": 2.7439866065979004 }, { "id": "identity_28649", "image": "VQA2.0/images/517395.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the train?" }, { "from": "gpt", "value": "The answer is purple" } ], "clip_score": 29.984375, "loss": 2.770979166030884 }, { "id": "identity_78541", "image": "ScienceQA/images/20366.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the approximate ratio of rusty nails to non-rusty nails in the image?" }, { "from": "gpt", "value": "The answer is 1:10" } ], "clip_score": 30.640625, "loss": 1.8950241804122925 }, { "id": "identity_77384", "image": "ScienceQA/images/10824.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the mass difference between the particles in Sample A and Sample B?" }, { "from": "gpt", "value": "The answer is 16 u" } ], "clip_score": 34.4375, "loss": 2.6068012714385986 }, { "id": "identity_154739", "image": "AI2D/images/1988.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the primary source of energy for the food web shown in the image?" }, { "from": "gpt", "value": "The answer is Grass & Seeds" } ], "clip_score": 33.40625, "loss": 1.8984854221343994 }, { "id": "identity_190381", "image": "A-OKVQA/images/336077.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the jersey number of the batter?" }, { "from": "gpt", "value": "The answer is 5" } ], "clip_score": 22.21875, "loss": 2.6192312240600586 }, { "id": "identity_175700", "image": "IconQA/images/12831.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total number of points on all the stars combined?" }, { "from": "gpt", "value": "The answer is 45" } ], "clip_score": 25.96875, "loss": 1.9792251586914062 }, { "id": "identity_332341", "image": "DocVQA/images/rpfx0227_11.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the \"High-priced foods\" Index for this year?" }, { "from": "gpt", "value": "The answer is 112.1" } ], "clip_score": 25.046875, "loss": 1.6718488931655884 }, { "id": "identity_254231", "image": "DocVQA/images/tkww0217_9.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the total estimated budget for both the editorial development of paper consensus and the follow-up activities?" }, { "from": "gpt", "value": "The answer is $58,000.00" } ], "clip_score": 22.515625, "loss": 1.647255778312683 }, { "id": "identity_17331", "image": "Geometry3K/images/1583.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Find \\angle 6.\nChoices:\n(A) 110\n(B) 140\n(C) 150\n(D) 220" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 32.4375, "loss": 2.6069858074188232 }, { "id": "identity_318986", "image": "DocVQA/images/hjgx0227_4.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Who is the CHAIRMAN of this Committee?" }, { "from": "gpt", "value": "The answer is MARY J. PULLIAM" } ], "clip_score": 22.109375, "loss": 1.3504317998886108 }, { "id": "identity_96511", "image": "CLEVR-Math/images/CLEVR_train_053978.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there more blue objects or green objects?" }, { "from": "gpt", "value": "The answer is Equal" } ], "clip_score": 30.328125, "loss": 3.757722854614258 }, { "id": "identity_91710", "image": "Geometry3K/images/600.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of z?" }, { "from": "gpt", "value": "The answer is 37" } ], "clip_score": 30.515625, "loss": 2.6372408866882324 }, { "id": "identity_143182", "image": "PlotQA/images/32744.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many years did travel services have a higher balance of payments percentage than transport services?" }, { "from": "gpt", "value": "The answer is 0 years" } ], "clip_score": 27.671875, "loss": 2.567714214324951 }, { "id": "identity_243376", "image": "VQA2.0/images/341075.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: how many giraffes are there in total and how many of them are standing up?" }, { "from": "gpt", "value": "The answer is 5 giraffes in total, 4 standing up" } ], "clip_score": 30.0625, "loss": 1.5980976819992065 }, { "id": "identity_192929", "image": "PMC-VQA/images/PMC4751255_F1_1700.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is the abnormality seen in the FLAIR image located in the right or left hemisphere of the brain?" }, { "from": "gpt", "value": "The answer is Left hemisphere" } ], "clip_score": 31.5, "loss": 2.4419848918914795 }, { "id": "identity_217564", "image": "A-OKVQA/images/87649.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any people on the platform?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 18.328125, "loss": 3.2674241065979004 }, { "id": "identity_216484", "image": "A-OKVQA/images/187611.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of event is suggested by the banners and crowd?" }, { "from": "gpt", "value": "The answer is A parade or protest march" } ], "clip_score": 25.609375, "loss": 3.277177333831787 }, { "id": "identity_70902", "image": "AI2D/images/3461.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many pairs of wings does the grasshopper have?" }, { "from": "gpt", "value": "The answer is Two pairs" } ], "clip_score": 31.78125, "loss": 4.106410503387451 }, { "id": "identity_324241", "image": "DocVQA/images/phnm0227_1.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the date mentioned in this document?" }, { "from": "gpt", "value": "The answer is june 9th, 1981" } ], "clip_score": 24.375, "loss": 2.1212470531463623 }, { "id": "identity_196480", "image": "PMC-VQA/images/PMC6994512_Fig5_109337.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the structure labeled as \"FL\" in blue?" }, { "from": "gpt", "value": "The answer is Fascia Lata" } ], "clip_score": 23.96875, "loss": 2.1760005950927734 }, { "id": "identity_209096", "image": "A-OKVQA/images/177149.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many individuals are wearing head coverings?" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 18.6875, "loss": 2.198310136795044 }, { "id": "identity_96494", "image": "TabMWP/images/3820.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If one cage is removed, how many hedgehogs are left in 3 cages?" }, { "from": "gpt", "value": "The answer is 6" } ], "clip_score": 30.109375, "loss": 2.318591833114624 }, { "id": "identity_266132", "image": "ScienceQA/images/18884.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the correct option letter, e.g., A, B, C, D, at the end.\nQuestion: Which of the following correctly identifies the direction of thermal energy transfer during this time: from the surroundings to each vial or from each vial to the surroundings?\nChoices:\n(A) the surroundings . . . each vial\n(B) each vial . . . the surroundings" }, { "from": "gpt", "value": "The answer is A" } ], "clip_score": 29.296875, "loss": 3.981916904449463 }, { "id": "identity_318360", "image": "DocVQA/images/tkyg0227_10.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the label for the highest curve?" }, { "from": "gpt", "value": "The answer is ALL FOODS" } ], "clip_score": 19.0625, "loss": 3.7130537033081055 }, { "id": "identity_108424", "image": "PMC-VQA/images/PMC6191221_F4_382930.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Are there any areas in the image that show no color, indicating a lack of blood flow?" }, { "from": "gpt", "value": "The answer is Yes, there are grey areas" } ], "clip_score": 27.8125, "loss": 3.0865378379821777 }, { "id": "identity_254847", "image": "IconQA/images/88256.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What sequence of transformations (slide, turn, flip) can be applied to the letter on the left to obtain the letter on the right?" }, { "from": "gpt", "value": "The answer is flip" } ], "clip_score": 27.96875, "loss": 2.8001773357391357 }, { "id": "identity_79103", "image": "UniGeo/images/1308.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: In the given diagram, if ∠A = 30° and ∠PDC = 70°, what is the measure of ∠BPC?" }, { "from": "gpt", "value": "The answer is 60°" } ], "clip_score": 35.34375, "loss": 1.6873445510864258 }, { "id": "identity_86065", "image": "UniGeo/images/1850.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,已知点M是线段AB的中点,N是线段AM上的点,且满足AN:MN=1:2,若线段MN=6cm,求线段AN的长度为多少厘米?" }, { "from": "gpt", "value": "The answer is 3cm" } ], "clip_score": 24.90625, "loss": 1.9696186780929565 }, { "id": "identity_155982", "image": "ScienceQA/images/572.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which state is directly south of Kentucky?" }, { "from": "gpt", "value": "The answer is Tennessee" } ], "clip_score": 27.84375, "loss": 2.918330430984497 }, { "id": "identity_87779", "image": "Geometry3K/images/561.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: If x = 7√3, find the length of side c in the triangle" }, { "from": "gpt", "value": "The answer is 14" } ], "clip_score": 33.84375, "loss": 2.921628475189209 }, { "id": "identity_233891", "image": "GeoQA+/images/40.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,直线a∥b,∠1=130°,求∠2和∠3的和是多少度?" }, { "from": "gpt", "value": "The answer is 180°" } ], "clip_score": 29.359375, "loss": 1.509176254272461 }, { "id": "identity_120559", "image": "VQA-RAD/images/synpic29771.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is used to produce this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 28.90625, "loss": 2.8847429752349854 }, { "id": "identity_82854", "image": "GeoQA+/images/1904.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: 如图,△ABC内接于⊙O,∠A=74°,则∠BOC的度数等于()" }, { "from": "gpt", "value": "The answer is ∠BOC = 2 * 74° = 148°" } ], "clip_score": 34.125, "loss": 1.2500544786453247 }, { "id": "identity_259381", "image": "VQA-AS/images/4857.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What factors should be considered to determine if these two individuals can safely use the teeter-totter together, and what adjustments could be made to ensure a balanced and safe experience?" }, { "from": "gpt", "value": "The answer is Adjust seating positions and use safety gear for balance" } ], "clip_score": 32.25, "loss": 3.960322380065918 }, { "id": "identity_116036", "image": "PMC-VQA/images/PMC7319815_ytaa086-F1_200240.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What type of echocardiogram is depicted in the image?" }, { "from": "gpt", "value": "The answer is Color flow Doppler echocardiogram" } ], "clip_score": 35.53125, "loss": 2.12580943107605 }, { "id": "identity_299995", "image": "DVQA/images/bar_train_00134926.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the value of the largest individual bar in this chart?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 27.453125, "loss": 2.8740692138671875 }, { "id": "identity_105647", "image": "TextVQA/images/4e3732d946f67cae.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the name of the publisher as seen on the spine of the book?" }, { "from": "gpt", "value": "The answer is Macfadden Books" } ], "clip_score": 21.703125, "loss": 2.3911993503570557 }, { "id": "identity_252546", "image": "IconQA/images/60601.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: How many rows and columns of hearts are there, and what is the total number of hearts?" }, { "from": "gpt", "value": "The answer is 9" } ], "clip_score": 29.5, "loss": 3.9245998859405518 }, { "id": "identity_184158", "image": "VQA2.0/images/548020.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Is there a pedestrian crossing the street?" }, { "from": "gpt", "value": "The answer is Yes" } ], "clip_score": 20.59375, "loss": 2.9232656955718994 }, { "id": "identity_287942", "image": "VQA2.0/images/343185.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the color of the phone case?" }, { "from": "gpt", "value": "The answer is blue" } ], "clip_score": 14.8359375, "loss": 3.88702392578125 }, { "id": "identity_177346", "image": "TabMWP/images/26563.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What is the range of broken plates per shipment?" }, { "from": "gpt", "value": "The answer is 74" } ], "clip_score": 19.53125, "loss": 3.23858642578125 }, { "id": "identity_108145", "image": "TextVQA/images/fc3ca7ad572166dc.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What color is the text background on the left poster?" }, { "from": "gpt", "value": "The answer is Red" } ], "clip_score": 15.2109375, "loss": 3.7699508666992188 }, { "id": "identity_123339", "image": "PMC-VQA/images/PMC6873490_Fig1_72667.jpg", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: What imaging technique is depicted in this image?" }, { "from": "gpt", "value": "The answer is CT scan" } ], "clip_score": 30.453125, "loss": 3.011890411376953 }, { "id": "identity_154409", "image": "AI2D/images/4343.png", "conversations": [ { "from": "human", "value": "\nHint: Please answer the question and provide the final answer at the end.\nQuestion: Which citrus fruit in the image is typically the largest in size?" }, { "from": "gpt", "value": "The answer is Pummelo" } ], "clip_score": 29.65625, "loss": 2.752950668334961 } ]